View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2026 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.checks.javadoc;
21  
22  import java.util.ArrayDeque;
23  import java.util.Arrays;
24  import java.util.Deque;
25  import java.util.List;
26  import java.util.Locale;
27  import java.util.Set;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import com.puppycrawl.tools.checkstyle.JavadocDetailNodeParser;
32  import com.puppycrawl.tools.checkstyle.StatelessCheck;
33  import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
34  import com.puppycrawl.tools.checkstyle.api.DetailAST;
35  import com.puppycrawl.tools.checkstyle.api.FileContents;
36  import com.puppycrawl.tools.checkstyle.api.Scope;
37  import com.puppycrawl.tools.checkstyle.api.TextBlock;
38  import com.puppycrawl.tools.checkstyle.api.TokenTypes;
39  import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
40  import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
41  import com.puppycrawl.tools.checkstyle.utils.ScopeUtil;
42  
43  /**
44   * <div>
45   * Validates Javadoc comments to help ensure they are well formed.
46   * </div>
47   *
48   * <p>
49   * The following checks are performed:
50   * </p>
51   * <ul>
52   * <li>
53   * Ensures the first sentence ends with proper punctuation
54   * (That is a period, question mark, or exclamation mark, by default).
55   * Note that this check is not applied to inline {@code @return} tags,
56   * because the Javadoc tools automatically appends a period to the end of the tag
57   * content.
58   * Javadoc automatically places the first sentence in the method summary
59   * table and index. Without proper punctuation the Javadoc may be malformed.
60   * All items eligible for the {@code {@inheritDoc}} tag are exempt from this
61   * requirement.
62   * </li>
63   * <li>
64   * Check text for Javadoc statements that do not have any description.
65   * This includes both completely empty Javadoc, and Javadoc with only tags
66   * such as {@code @param} and {@code @return}.
67   * </li>
68   * <li>
69   * Check text for incomplete HTML tags. Verifies that HTML tags have
70   * corresponding end tags and issues an "Unclosed HTML tag found:" error if not.
71   * An "Extra HTML tag found:" error is issued if an end tag is found without
72   * a previous open tag.
73   * </li>
74   * <li>
75   * Check that a package Javadoc comment is well-formed (as described above).
76   * </li>
77   * <li>
78   * Check for allowed HTML tags. The list of allowed HTML tags is
79   * "a", "abbr", "acronym", "address", "area", "b", "bdo", "big", "blockquote",
80   * "br", "caption", "cite", "code", "colgroup", "dd", "del", "dfn", "div", "dl",
81   * "dt", "em", "fieldset", "font", "h1", "h2", "h3", "h4", "h5", "h6", "hr",
82   * "i", "img", "ins", "kbd", "li", "ol", "p", "pre", "q", "samp", "small",
83   * "span", "strong", "sub", "sup", "table", "tbody", "td", "tfoot", "th",
84   * "thead", "tr", "tt", "u", "ul", "var".
85   * </li>
86   * </ul>
87   *
88   * <p>
89   * These checks were patterned after the checks made by the
90   * <a href="https://maven-doccheck.sourceforge.net">DocCheck</a> doclet
91   * available from Sun. Note: Original Sun's DocCheck tool does not exist anymore.
92   * </p>
93   *
94   * @since 3.2
95   */
96  @StatelessCheck
97  public class JavadocStyleCheck
98      extends AbstractCheck {
99  
100     /** Message property key for the Empty Javadoc message. */
101     public static final String MSG_EMPTY = "javadoc.empty";
102 
103     /** Message property key for the No Javadoc end of Sentence Period message. */
104     public static final String MSG_NO_PERIOD = "javadoc.noPeriod";
105 
106     /** Message property key for the Incomplete Tag message. */
107     public static final String MSG_INCOMPLETE_TAG = "javadoc.incompleteTag";
108 
109     /** Message property key for the Unclosed HTML message. */
110     public static final String MSG_UNCLOSED_HTML = JavadocDetailNodeParser.MSG_UNCLOSED_HTML_TAG;
111 
112     /** Message property key for the Extra HTML message. */
113     public static final String MSG_EXTRA_HTML = "javadoc.extraHtml";
114 
115     /** HTML tags that do not require a close tag. */
116     private static final Set<String> SINGLE_TAGS = Set.of(
117         "br", "li", "dt", "dd", "hr", "img", "p", "td", "tr", "th"
118     );
119 
120     /**
121      * HTML tags that are allowed in java docs.
122      * From <a href="https://www.w3schools.com/tags/default.asp">w3schools</a>:
123      * <br>
124      * The forms and structure tags are not allowed
125      */
126     private static final Set<String> ALLOWED_TAGS = Set.of(
127         "a", "abbr", "acronym", "address", "area", "b", "bdo", "big",
128         "blockquote", "br", "caption", "cite", "code", "colgroup", "dd",
129         "del", "dfn", "div", "dl", "dt", "em", "fieldset", "font", "h1",
130         "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "ins", "kbd",
131         "li", "ol", "p", "pre", "q", "samp", "small", "span", "strong",
132         "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead",
133         "tr", "tt", "u", "ul", "var"
134     );
135 
136     /** Specify the format for inline return Javadoc. */
137     private static final Pattern INLINE_RETURN_TAG_PATTERN =
138             Pattern.compile("\\{@return.*?}\\s*");
139 
140     /** Specify the format for first word in javadoc. */
141     private static final Pattern SENTENCE_SEPARATOR = Pattern.compile("\\.(?=\\s|$)");
142 
143     /** Specify the visibility scope where Javadoc comments are checked. */
144     private Scope scope = Scope.PRIVATE;
145 
146     /** Specify the visibility scope where Javadoc comments are not checked. */
147     private Scope excludeScope;
148 
149     /** Specify the format for matching the end of a sentence. */
150     private Pattern endOfSentenceFormat = Pattern.compile("([.?!][ \t\n\r\f<])|([.?!]$)");
151 
152     /**
153      * Control whether to check the first sentence for proper end of sentence.
154      */
155     private boolean checkFirstSentence = true;
156 
157     /**
158      * Control whether to check for incomplete HTML tags.
159      */
160     private boolean checkHtml = true;
161 
162     /**
163      * Control whether to check if the Javadoc is missing a describing text.
164      */
165     private boolean checkEmptyJavadoc;
166 
167     @Override
168     public int[] getDefaultTokens() {
169         return getAcceptableTokens();
170     }
171 
172     @Override
173     public int[] getAcceptableTokens() {
174         return new int[] {
175             TokenTypes.ANNOTATION_DEF,
176             TokenTypes.ANNOTATION_FIELD_DEF,
177             TokenTypes.CLASS_DEF,
178             TokenTypes.CTOR_DEF,
179             TokenTypes.ENUM_CONSTANT_DEF,
180             TokenTypes.ENUM_DEF,
181             TokenTypes.INTERFACE_DEF,
182             TokenTypes.METHOD_DEF,
183             TokenTypes.PACKAGE_DEF,
184             TokenTypes.VARIABLE_DEF,
185             TokenTypes.RECORD_DEF,
186             TokenTypes.COMPACT_CTOR_DEF,
187         };
188     }
189 
190     @Override
191     public int[] getRequiredTokens() {
192         return CommonUtil.EMPTY_INT_ARRAY;
193     }
194 
195     // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
196     @Override
197     @SuppressWarnings("deprecation")
198     public void visitToken(DetailAST ast) {
199         if (shouldCheck(ast)) {
200             final FileContents contents = getFileContents();
201             // Need to start searching for the comment before the annotations
202             // that may exist. Even if annotations are not defined on the
203             // package, the ANNOTATIONS AST is defined.
204             final TextBlock textBlock =
205                 contents.getJavadocBefore(ast.getFirstChild().getLineNo());
206 
207             checkComment(ast, textBlock);
208         }
209     }
210 
211     /**
212      * Whether we should check this node.
213      *
214      * @param ast a given node.
215      * @return whether we should check a given node.
216      */
217     private boolean shouldCheck(final DetailAST ast) {
218         boolean check = false;
219 
220         if (ast.getType() == TokenTypes.PACKAGE_DEF) {
221             check = CheckUtil.isPackageInfo(getFilePath());
222         }
223         else if (!ScopeUtil.isInCodeBlock(ast)) {
224             final Scope customScope = ScopeUtil.getScope(ast);
225             check = ScopeUtil.getSurroundingScope(ast)
226                     .map(surroundingScope -> {
227                         return customScope.isIn(scope)
228                                 && surroundingScope.isIn(scope)
229                                 && (excludeScope == null || !customScope.isIn(excludeScope)
230                                         || !surroundingScope.isIn(excludeScope));
231                     })
232                     .orElse(Boolean.FALSE);
233         }
234         return check;
235     }
236 
237     /**
238      * Performs the various checks against the Javadoc comment.
239      *
240      * @param ast the AST of the element being documented
241      * @param comment the source lines that make up the Javadoc comment.
242      *
243      * @see #checkFirstSentenceEnding(DetailAST, TextBlock)
244      * @see #checkHtmlTags(DetailAST, TextBlock)
245      */
246     private void checkComment(final DetailAST ast, final TextBlock comment) {
247         if (comment != null) {
248             if (checkFirstSentence) {
249                 checkFirstSentenceEnding(ast, comment);
250             }
251 
252             if (checkHtml) {
253                 checkHtmlTags(ast, comment);
254             }
255 
256             if (checkEmptyJavadoc) {
257                 checkJavadocIsNotEmpty(comment);
258             }
259         }
260     }
261 
262     /**
263      * Checks that the first sentence ends with proper punctuation.  This method
264      * uses a regular expression that checks for the presence of a period,
265      * question mark, or exclamation mark followed either by whitespace, an
266      * HTML element, or the end of string. This method ignores {_AT_inheritDoc}
267      * comments for TokenTypes that are valid for {_AT_inheritDoc}.
268      *
269      * @param ast the current node
270      * @param comment the source lines that make up the Javadoc comment.
271      */
272     private void checkFirstSentenceEnding(final DetailAST ast, TextBlock comment) {
273         final String commentText = getCommentText(comment.getText());
274         final boolean hasInLineReturnTag = Arrays.stream(SENTENCE_SEPARATOR.split(commentText))
275                 .findFirst()
276                 .map(INLINE_RETURN_TAG_PATTERN::matcher)
277                 .filter(Matcher::find)
278                 .isPresent();
279 
280         if (!hasInLineReturnTag
281             && !commentText.isEmpty()
282             && !endOfSentenceFormat.matcher(commentText).find()
283             && !(commentText.startsWith("{@inheritDoc}")
284             && JavadocTagInfo.INHERIT_DOC.isValidOn(ast))) {
285             log(comment.getStartLineNo(), MSG_NO_PERIOD);
286         }
287     }
288 
289     /**
290      * Checks that the Javadoc is not empty.
291      *
292      * @param comment the source lines that make up the Javadoc comment.
293      */
294     private void checkJavadocIsNotEmpty(TextBlock comment) {
295         final String commentText = getCommentText(comment.getText());
296 
297         if (commentText.isEmpty()) {
298             log(comment.getStartLineNo(), MSG_EMPTY);
299         }
300     }
301 
302     /**
303      * Returns the comment text from the Javadoc.
304      *
305      * @param comments the lines of Javadoc.
306      * @return a comment text String.
307      */
308     private static String getCommentText(String... comments) {
309         final StringBuilder builder = new StringBuilder(1024);
310         for (final String line : comments) {
311             final int textStart = findTextStart(line);
312 
313             if (textStart != -1) {
314                 if (line.charAt(textStart) == '@') {
315                     // we have found the tag section
316                     break;
317                 }
318                 builder.append(line.substring(textStart));
319                 trimTail(builder);
320                 builder.append('\n');
321             }
322         }
323 
324         return builder.toString().trim();
325     }
326 
327     /**
328      * Finds the index of the first non-whitespace character ignoring the
329      * Javadoc comment start and end strings (&#47;** and *&#47;) as well as any
330      * leading asterisk.
331      *
332      * @param line the Javadoc comment line of text to scan.
333      * @return the int index relative to 0 for the start of text
334      *         or -1 if not found.
335      */
336     private static int findTextStart(String line) {
337         int textStart = -1;
338         int index = 0;
339         while (index < line.length()) {
340             if (!Character.isWhitespace(line.charAt(index))) {
341                 if (line.regionMatches(index, "/**", 0, "/**".length())
342                     || line.regionMatches(index, "*/", 0, 2)) {
343                     index++;
344                 }
345                 else if (line.charAt(index) != '*') {
346                     textStart = index;
347                     break;
348                 }
349             }
350             index++;
351         }
352         return textStart;
353     }
354 
355     /**
356      * Trims any trailing whitespace or the end of Javadoc comment string.
357      *
358      * @param builder the StringBuilder to trim.
359      */
360     private static void trimTail(StringBuilder builder) {
361         int index = builder.length() - 1;
362         while (true) {
363             if (Character.isWhitespace(builder.charAt(index))) {
364                 builder.deleteCharAt(index);
365             }
366             else if (index > 0 && builder.charAt(index) == '/'
367                     && builder.charAt(index - 1) == '*') {
368                 builder.deleteCharAt(index);
369                 builder.deleteCharAt(index - 1);
370                 index--;
371                 while (builder.charAt(index - 1) == '*') {
372                     builder.deleteCharAt(index - 1);
373                     index--;
374                 }
375             }
376             else {
377                 break;
378             }
379             index--;
380         }
381     }
382 
383     /**
384      * Checks the comment for HTML tags that do not have a corresponding close
385      * tag or a close tag that has no previous open tag.  This code was
386      * primarily copied from the DocCheck checkHtml method.
387      *
388      * @param ast the node with the Javadoc
389      * @param comment the {@code TextBlock} which represents
390      *                 the Javadoc comment.
391      * @noinspection MethodWithMultipleReturnPoints
392      * @noinspectionreason MethodWithMultipleReturnPoints - check and method are
393      *      too complex to break apart
394      */
395     // -@cs[ReturnCount] Too complex to break apart.
396     private void checkHtmlTags(final DetailAST ast, final TextBlock comment) {
397         final int lineNo = comment.getStartLineNo();
398         final Deque<HtmlTag> htmlStack = new ArrayDeque<>();
399         final String[] text = comment.getText();
400 
401         final TagParser parser = new TagParser(text, lineNo);
402 
403         while (parser.hasNextTag()) {
404             final HtmlTag tag = parser.nextTag();
405 
406             if (tag.isIncompleteTag()) {
407                 log(tag.getLineNo(), MSG_INCOMPLETE_TAG,
408                     text[tag.getLineNo() - lineNo]);
409                 return;
410             }
411             if (tag.isClosedTag()) {
412                 // do nothing
413                 continue;
414             }
415             if (tag.isCloseTag()) {
416                 // We have found a close tag.
417                 if (isExtraHtml(tag.getId(), htmlStack)) {
418                     // No corresponding open tag was found on the stack.
419                     log(tag.getLineNo(),
420                         tag.getPosition(),
421                         MSG_EXTRA_HTML,
422                         tag.getText());
423                 }
424                 else {
425                     // See if there are any unclosed tags that were opened
426                     // after this one.
427                     checkUnclosedTags(htmlStack, tag.getId());
428                 }
429             }
430             else {
431                 // We only push html tags that are allowed
432                 if (isAllowedTag(tag)) {
433                     htmlStack.push(tag);
434                 }
435             }
436         }
437 
438         // Identify any tags left on the stack.
439         // Skip multiples, like <b>...<b>
440         String lastFound = "";
441         final List<String> typeParameters = CheckUtil.getTypeParameterNames(ast);
442         for (final HtmlTag htmlTag : htmlStack) {
443             if (!isSingleTag(htmlTag)
444                 && !htmlTag.getId().equals(lastFound)
445                 && !typeParameters.contains(htmlTag.getId())) {
446                 log(htmlTag.getLineNo(), htmlTag.getPosition(),
447                         MSG_UNCLOSED_HTML, htmlTag.getText());
448                 lastFound = htmlTag.getId();
449             }
450         }
451     }
452 
453     /**
454      * Checks to see if there are any unclosed tags on the stack.  The token
455      * represents a html tag that has been closed and has a corresponding open
456      * tag on the stack.  Any tags, except single tags, that were opened
457      * (pushed on the stack) after the token are missing a close.
458      *
459      * @param htmlStack the stack of opened HTML tags.
460      * @param token the current HTML tag name that has been closed.
461      */
462     private void checkUnclosedTags(Deque<HtmlTag> htmlStack, String token) {
463         final Deque<HtmlTag> unclosedTags = new ArrayDeque<>();
464         HtmlTag lastOpenTag = htmlStack.pop();
465         while (!token.equalsIgnoreCase(lastOpenTag.getId())) {
466             // Find unclosed elements. Put them on a stack so the
467             // output order won't be back-to-front.
468             if (isSingleTag(lastOpenTag)) {
469                 lastOpenTag = htmlStack.pop();
470             }
471             else {
472                 unclosedTags.push(lastOpenTag);
473                 lastOpenTag = htmlStack.pop();
474             }
475         }
476 
477         // Output the unterminated tags, if any
478         // Skip multiples, like <b>..<b>
479         String lastFound = "";
480         for (final HtmlTag htag : unclosedTags) {
481             lastOpenTag = htag;
482             if (lastOpenTag.getId().equals(lastFound)) {
483                 continue;
484             }
485             lastFound = lastOpenTag.getId();
486             log(lastOpenTag.getLineNo(),
487                 lastOpenTag.getPosition(),
488                 MSG_UNCLOSED_HTML,
489                 lastOpenTag.getText());
490         }
491     }
492 
493     /**
494      * Determines if the HtmlTag is one which does not require a close tag.
495      *
496      * @param tag the HtmlTag to check.
497      * @return {@code true} if the HtmlTag is a single tag.
498      */
499     private static boolean isSingleTag(HtmlTag tag) {
500         // If it's a singleton tag (<p>, <br>, etc.), ignore it
501         // Can't simply not put them on the stack, since singletons
502         // like <dt> and <dd> (unhappily) may either be terminated
503         // or not terminated. Both options are legal.
504         return SINGLE_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
505     }
506 
507     /**
508      * Determines if the HtmlTag is one which is allowed in a javadoc.
509      *
510      * @param tag the HtmlTag to check.
511      * @return {@code true} if the HtmlTag is an allowed html tag.
512      */
513     private static boolean isAllowedTag(HtmlTag tag) {
514         return ALLOWED_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
515     }
516 
517     /**
518      * Determines if the given token is an extra HTML tag. This indicates that
519      * a close tag was found that does not have a corresponding open tag.
520      *
521      * @param token an HTML tag id for which a close was found.
522      * @param htmlStack a Stack of previous open HTML tags.
523      * @return {@code false} if a previous open tag was found
524      *         for the token.
525      */
526     private static boolean isExtraHtml(String token, Deque<HtmlTag> htmlStack) {
527         boolean isExtra = true;
528         for (final HtmlTag tag : htmlStack) {
529             // Loop, looking for tags that are closed.
530             // The loop is needed in case there are unclosed
531             // tags on the stack. In that case, the stack would
532             // not be empty, but this tag would still be extra.
533             if (token.equalsIgnoreCase(tag.getId())) {
534                 isExtra = false;
535                 break;
536             }
537         }
538 
539         return isExtra;
540     }
541 
542     /**
543      * Setter to specify the visibility scope where Javadoc comments are checked.
544      *
545      * @param scope a scope.
546      * @since 3.2
547      */
548     public void setScope(Scope scope) {
549         this.scope = scope;
550     }
551 
552     /**
553      * Setter to specify the visibility scope where Javadoc comments are not checked.
554      *
555      * @param excludeScope a scope.
556      * @since 3.4
557      */
558     public void setExcludeScope(Scope excludeScope) {
559         this.excludeScope = excludeScope;
560     }
561 
562     /**
563      * Setter to specify the format for matching the end of a sentence.
564      *
565      * @param pattern a pattern.
566      * @since 5.0
567      */
568     public void setEndOfSentenceFormat(Pattern pattern) {
569         endOfSentenceFormat = pattern;
570     }
571 
572     /**
573      * Setter to control whether to check the first sentence for proper end of sentence.
574      *
575      * @param flag {@code true} if the first sentence is to be checked
576      * @since 3.2
577      */
578     public void setCheckFirstSentence(boolean flag) {
579         checkFirstSentence = flag;
580     }
581 
582     /**
583      * Setter to control whether to check for incomplete HTML tags.
584      *
585      * @param flag {@code true} if HTML checking is to be performed.
586      * @since 3.2
587      */
588     public void setCheckHtml(boolean flag) {
589         checkHtml = flag;
590     }
591 
592     /**
593      * Setter to control whether to check if the Javadoc is missing a describing text.
594      *
595      * @param flag {@code true} if empty Javadoc checking should be done.
596      * @since 3.4
597      */
598     public void setCheckEmptyJavadoc(boolean flag) {
599         checkEmptyJavadoc = flag;
600     }
601 
602 }