001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2026 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.javadoc;
021
022import java.util.ArrayDeque;
023import java.util.Arrays;
024import java.util.Deque;
025import java.util.List;
026import java.util.Locale;
027import java.util.Set;
028import java.util.regex.Matcher;
029import java.util.regex.Pattern;
030
031import com.puppycrawl.tools.checkstyle.JavadocDetailNodeParser;
032import com.puppycrawl.tools.checkstyle.StatelessCheck;
033import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
034import com.puppycrawl.tools.checkstyle.api.DetailAST;
035import com.puppycrawl.tools.checkstyle.api.FileContents;
036import com.puppycrawl.tools.checkstyle.api.Scope;
037import com.puppycrawl.tools.checkstyle.api.TextBlock;
038import com.puppycrawl.tools.checkstyle.api.TokenTypes;
039import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
040import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
041import com.puppycrawl.tools.checkstyle.utils.ScopeUtil;
042
043/**
044 * <div>
045 * Validates Javadoc comments to help ensure they are well formed.
046 * </div>
047 *
048 * <p>
049 * The following checks are performed:
050 * </p>
051 * <ul>
052 * <li>
053 * Ensures the first sentence ends with proper punctuation
054 * (That is a period, question mark, or exclamation mark, by default).
055 * Note that this check is not applied to inline {@code @return} tags,
056 * because the Javadoc tools automatically appends a period to the end of the tag
057 * content.
058 * Javadoc automatically places the first sentence in the method summary
059 * table and index. Without proper punctuation the Javadoc may be malformed.
060 * All items eligible for the {@code {@inheritDoc}} tag are exempt from this
061 * requirement.
062 * </li>
063 * <li>
064 * Check text for Javadoc statements that do not have any description.
065 * This includes both completely empty Javadoc, and Javadoc with only tags
066 * such as {@code @param} and {@code @return}.
067 * </li>
068 * <li>
069 * Check text for incomplete HTML tags. Verifies that HTML tags have
070 * corresponding end tags and issues an "Unclosed HTML tag found:" error if not.
071 * An "Extra HTML tag found:" error is issued if an end tag is found without
072 * a previous open tag.
073 * </li>
074 * <li>
075 * Check that a package Javadoc comment is well-formed (as described above).
076 * </li>
077 * <li>
078 * Check for allowed HTML tags. The list of allowed HTML tags is
079 * "a", "abbr", "acronym", "address", "area", "b", "bdo", "big", "blockquote",
080 * "br", "caption", "cite", "code", "colgroup", "dd", "del", "dfn", "div", "dl",
081 * "dt", "em", "fieldset", "font", "h1", "h2", "h3", "h4", "h5", "h6", "hr",
082 * "i", "img", "ins", "kbd", "li", "ol", "p", "pre", "q", "samp", "small",
083 * "span", "strong", "sub", "sup", "table", "tbody", "td", "tfoot", "th",
084 * "thead", "tr", "tt", "u", "ul", "var".
085 * </li>
086 * </ul>
087 *
088 * <p>
089 * These checks were patterned after the checks made by the
090 * <a href="https://maven-doccheck.sourceforge.net">DocCheck</a> doclet
091 * available from Sun. Note: Original Sun's DocCheck tool does not exist anymore.
092 * </p>
093 *
094 * @since 3.2
095 */
096@StatelessCheck
097public class JavadocStyleCheck
098    extends AbstractCheck {
099
100    /** Message property key for the Empty Javadoc message. */
101    public static final String MSG_EMPTY = "javadoc.empty";
102
103    /** Message property key for the No Javadoc end of Sentence Period message. */
104    public static final String MSG_NO_PERIOD = "javadoc.noPeriod";
105
106    /** Message property key for the Incomplete Tag message. */
107    public static final String MSG_INCOMPLETE_TAG = "javadoc.incompleteTag";
108
109    /** Message property key for the Unclosed HTML message. */
110    public static final String MSG_UNCLOSED_HTML = JavadocDetailNodeParser.MSG_UNCLOSED_HTML_TAG;
111
112    /** Message property key for the Extra HTML message. */
113    public static final String MSG_EXTRA_HTML = "javadoc.extraHtml";
114
115    /** HTML tags that do not require a close tag. */
116    private static final Set<String> SINGLE_TAGS = Set.of(
117        "br", "li", "dt", "dd", "hr", "img", "p", "td", "tr", "th"
118    );
119
120    /**
121     * HTML tags that are allowed in java docs.
122     * From <a href="https://www.w3schools.com/tags/default.asp">w3schools</a>:
123     * <br>
124     * The forms and structure tags are not allowed
125     */
126    private static final Set<String> ALLOWED_TAGS = Set.of(
127        "a", "abbr", "acronym", "address", "area", "b", "bdo", "big",
128        "blockquote", "br", "caption", "cite", "code", "colgroup", "dd",
129        "del", "dfn", "div", "dl", "dt", "em", "fieldset", "font", "h1",
130        "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "ins", "kbd",
131        "li", "ol", "p", "pre", "q", "samp", "small", "span", "strong",
132        "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead",
133        "tr", "tt", "u", "ul", "var"
134    );
135
136    /** Specify the format for inline return Javadoc. */
137    private static final Pattern INLINE_RETURN_TAG_PATTERN =
138            Pattern.compile("\\{@return.*?}\\s*");
139
140    /** Specify the format for first word in javadoc. */
141    private static final Pattern SENTENCE_SEPARATOR = Pattern.compile("\\.(?=\\s|$)");
142
143    /** Specify the visibility scope where Javadoc comments are checked. */
144    private Scope scope = Scope.PRIVATE;
145
146    /** Specify the visibility scope where Javadoc comments are not checked. */
147    private Scope excludeScope;
148
149    /** Specify the format for matching the end of a sentence. */
150    private Pattern endOfSentenceFormat = Pattern.compile("([.?!][ \t\n\r\f<])|([.?!]$)");
151
152    /**
153     * Control whether to check the first sentence for proper end of sentence.
154     */
155    private boolean checkFirstSentence = true;
156
157    /**
158     * Control whether to check for incomplete HTML tags.
159     */
160    private boolean checkHtml = true;
161
162    /**
163     * Control whether to check if the Javadoc is missing a describing text.
164     */
165    private boolean checkEmptyJavadoc;
166
167    @Override
168    public int[] getDefaultTokens() {
169        return getAcceptableTokens();
170    }
171
172    @Override
173    public int[] getAcceptableTokens() {
174        return new int[] {
175            TokenTypes.ANNOTATION_DEF,
176            TokenTypes.ANNOTATION_FIELD_DEF,
177            TokenTypes.CLASS_DEF,
178            TokenTypes.CTOR_DEF,
179            TokenTypes.ENUM_CONSTANT_DEF,
180            TokenTypes.ENUM_DEF,
181            TokenTypes.INTERFACE_DEF,
182            TokenTypes.METHOD_DEF,
183            TokenTypes.PACKAGE_DEF,
184            TokenTypes.VARIABLE_DEF,
185            TokenTypes.RECORD_DEF,
186            TokenTypes.COMPACT_CTOR_DEF,
187        };
188    }
189
190    @Override
191    public int[] getRequiredTokens() {
192        return CommonUtil.EMPTY_INT_ARRAY;
193    }
194
195    // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
196    @Override
197    @SuppressWarnings("deprecation")
198    public void visitToken(DetailAST ast) {
199        if (shouldCheck(ast)) {
200            final FileContents contents = getFileContents();
201            // Need to start searching for the comment before the annotations
202            // that may exist. Even if annotations are not defined on the
203            // package, the ANNOTATIONS AST is defined.
204            final TextBlock textBlock =
205                contents.getJavadocBefore(ast.getFirstChild().getLineNo());
206
207            checkComment(ast, textBlock);
208        }
209    }
210
211    /**
212     * Whether we should check this node.
213     *
214     * @param ast a given node.
215     * @return whether we should check a given node.
216     */
217    private boolean shouldCheck(final DetailAST ast) {
218        boolean check = false;
219
220        if (ast.getType() == TokenTypes.PACKAGE_DEF) {
221            check = CheckUtil.isPackageInfo(getFilePath());
222        }
223        else if (!ScopeUtil.isInCodeBlock(ast)) {
224            final Scope customScope = ScopeUtil.getScope(ast);
225            check = ScopeUtil.getSurroundingScope(ast)
226                    .map(surroundingScope -> {
227                        return customScope.isIn(scope)
228                                && surroundingScope.isIn(scope)
229                                && (excludeScope == null || !customScope.isIn(excludeScope)
230                                        || !surroundingScope.isIn(excludeScope));
231                    })
232                    .orElse(Boolean.FALSE);
233        }
234        return check;
235    }
236
237    /**
238     * Performs the various checks against the Javadoc comment.
239     *
240     * @param ast the AST of the element being documented
241     * @param comment the source lines that make up the Javadoc comment.
242     *
243     * @see #checkFirstSentenceEnding(DetailAST, TextBlock)
244     * @see #checkHtmlTags(DetailAST, TextBlock)
245     */
246    private void checkComment(final DetailAST ast, final TextBlock comment) {
247        if (comment != null) {
248            if (checkFirstSentence) {
249                checkFirstSentenceEnding(ast, comment);
250            }
251
252            if (checkHtml) {
253                checkHtmlTags(ast, comment);
254            }
255
256            if (checkEmptyJavadoc) {
257                checkJavadocIsNotEmpty(comment);
258            }
259        }
260    }
261
262    /**
263     * Checks that the first sentence ends with proper punctuation.  This method
264     * uses a regular expression that checks for the presence of a period,
265     * question mark, or exclamation mark followed either by whitespace, an
266     * HTML element, or the end of string. This method ignores {_AT_inheritDoc}
267     * comments for TokenTypes that are valid for {_AT_inheritDoc}.
268     *
269     * @param ast the current node
270     * @param comment the source lines that make up the Javadoc comment.
271     */
272    private void checkFirstSentenceEnding(final DetailAST ast, TextBlock comment) {
273        final String commentText = getCommentText(comment.getText());
274        final boolean hasInLineReturnTag = Arrays.stream(SENTENCE_SEPARATOR.split(commentText))
275                .findFirst()
276                .map(INLINE_RETURN_TAG_PATTERN::matcher)
277                .filter(Matcher::find)
278                .isPresent();
279
280        if (!hasInLineReturnTag
281            && !commentText.isEmpty()
282            && !endOfSentenceFormat.matcher(commentText).find()
283            && !(commentText.startsWith("{@inheritDoc}")
284            && JavadocTagInfo.INHERIT_DOC.isValidOn(ast))) {
285            log(comment.getStartLineNo(), MSG_NO_PERIOD);
286        }
287    }
288
289    /**
290     * Checks that the Javadoc is not empty.
291     *
292     * @param comment the source lines that make up the Javadoc comment.
293     */
294    private void checkJavadocIsNotEmpty(TextBlock comment) {
295        final String commentText = getCommentText(comment.getText());
296
297        if (commentText.isEmpty()) {
298            log(comment.getStartLineNo(), MSG_EMPTY);
299        }
300    }
301
302    /**
303     * Returns the comment text from the Javadoc.
304     *
305     * @param comments the lines of Javadoc.
306     * @return a comment text String.
307     */
308    private static String getCommentText(String... comments) {
309        final StringBuilder builder = new StringBuilder(1024);
310        for (final String line : comments) {
311            final int textStart = findTextStart(line);
312
313            if (textStart != -1) {
314                if (line.charAt(textStart) == '@') {
315                    // we have found the tag section
316                    break;
317                }
318                builder.append(line.substring(textStart));
319                trimTail(builder);
320                builder.append('\n');
321            }
322        }
323
324        return builder.toString().trim();
325    }
326
327    /**
328     * Finds the index of the first non-whitespace character ignoring the
329     * Javadoc comment start and end strings (&#47;** and *&#47;) as well as any
330     * leading asterisk.
331     *
332     * @param line the Javadoc comment line of text to scan.
333     * @return the int index relative to 0 for the start of text
334     *         or -1 if not found.
335     */
336    private static int findTextStart(String line) {
337        int textStart = -1;
338        int index = 0;
339        while (index < line.length()) {
340            if (!Character.isWhitespace(line.charAt(index))) {
341                if (line.regionMatches(index, "/**", 0, "/**".length())
342                    || line.regionMatches(index, "*/", 0, 2)) {
343                    index++;
344                }
345                else if (line.charAt(index) != '*') {
346                    textStart = index;
347                    break;
348                }
349            }
350            index++;
351        }
352        return textStart;
353    }
354
355    /**
356     * Trims any trailing whitespace or the end of Javadoc comment string.
357     *
358     * @param builder the StringBuilder to trim.
359     */
360    private static void trimTail(StringBuilder builder) {
361        int index = builder.length() - 1;
362        while (true) {
363            if (Character.isWhitespace(builder.charAt(index))) {
364                builder.deleteCharAt(index);
365            }
366            else if (index > 0 && builder.charAt(index) == '/'
367                    && builder.charAt(index - 1) == '*') {
368                builder.deleteCharAt(index);
369                builder.deleteCharAt(index - 1);
370                index--;
371                while (builder.charAt(index - 1) == '*') {
372                    builder.deleteCharAt(index - 1);
373                    index--;
374                }
375            }
376            else {
377                break;
378            }
379            index--;
380        }
381    }
382
383    /**
384     * Checks the comment for HTML tags that do not have a corresponding close
385     * tag or a close tag that has no previous open tag.  This code was
386     * primarily copied from the DocCheck checkHtml method.
387     *
388     * @param ast the node with the Javadoc
389     * @param comment the {@code TextBlock} which represents
390     *                 the Javadoc comment.
391     * @noinspection MethodWithMultipleReturnPoints
392     * @noinspectionreason MethodWithMultipleReturnPoints - check and method are
393     *      too complex to break apart
394     */
395    // -@cs[ReturnCount] Too complex to break apart.
396    private void checkHtmlTags(final DetailAST ast, final TextBlock comment) {
397        final int lineNo = comment.getStartLineNo();
398        final Deque<HtmlTag> htmlStack = new ArrayDeque<>();
399        final String[] text = comment.getText();
400
401        final TagParser parser = new TagParser(text, lineNo);
402
403        while (parser.hasNextTag()) {
404            final HtmlTag tag = parser.nextTag();
405
406            if (tag.isIncompleteTag()) {
407                log(tag.getLineNo(), MSG_INCOMPLETE_TAG,
408                    text[tag.getLineNo() - lineNo]);
409                return;
410            }
411            if (tag.isClosedTag()) {
412                // do nothing
413                continue;
414            }
415            if (tag.isCloseTag()) {
416                // We have found a close tag.
417                if (isExtraHtml(tag.getId(), htmlStack)) {
418                    // No corresponding open tag was found on the stack.
419                    log(tag.getLineNo(),
420                        tag.getPosition(),
421                        MSG_EXTRA_HTML,
422                        tag.getText());
423                }
424                else {
425                    // See if there are any unclosed tags that were opened
426                    // after this one.
427                    checkUnclosedTags(htmlStack, tag.getId());
428                }
429            }
430            else {
431                // We only push html tags that are allowed
432                if (isAllowedTag(tag)) {
433                    htmlStack.push(tag);
434                }
435            }
436        }
437
438        // Identify any tags left on the stack.
439        // Skip multiples, like <b>...<b>
440        String lastFound = "";
441        final List<String> typeParameters = CheckUtil.getTypeParameterNames(ast);
442        for (final HtmlTag htmlTag : htmlStack) {
443            if (!isSingleTag(htmlTag)
444                && !htmlTag.getId().equals(lastFound)
445                && !typeParameters.contains(htmlTag.getId())) {
446                log(htmlTag.getLineNo(), htmlTag.getPosition(),
447                        MSG_UNCLOSED_HTML, htmlTag.getText());
448                lastFound = htmlTag.getId();
449            }
450        }
451    }
452
453    /**
454     * Checks to see if there are any unclosed tags on the stack.  The token
455     * represents a html tag that has been closed and has a corresponding open
456     * tag on the stack.  Any tags, except single tags, that were opened
457     * (pushed on the stack) after the token are missing a close.
458     *
459     * @param htmlStack the stack of opened HTML tags.
460     * @param token the current HTML tag name that has been closed.
461     */
462    private void checkUnclosedTags(Deque<HtmlTag> htmlStack, String token) {
463        final Deque<HtmlTag> unclosedTags = new ArrayDeque<>();
464        HtmlTag lastOpenTag = htmlStack.pop();
465        while (!token.equalsIgnoreCase(lastOpenTag.getId())) {
466            // Find unclosed elements. Put them on a stack so the
467            // output order won't be back-to-front.
468            if (isSingleTag(lastOpenTag)) {
469                lastOpenTag = htmlStack.pop();
470            }
471            else {
472                unclosedTags.push(lastOpenTag);
473                lastOpenTag = htmlStack.pop();
474            }
475        }
476
477        // Output the unterminated tags, if any
478        // Skip multiples, like <b>..<b>
479        String lastFound = "";
480        for (final HtmlTag htag : unclosedTags) {
481            lastOpenTag = htag;
482            if (lastOpenTag.getId().equals(lastFound)) {
483                continue;
484            }
485            lastFound = lastOpenTag.getId();
486            log(lastOpenTag.getLineNo(),
487                lastOpenTag.getPosition(),
488                MSG_UNCLOSED_HTML,
489                lastOpenTag.getText());
490        }
491    }
492
493    /**
494     * Determines if the HtmlTag is one which does not require a close tag.
495     *
496     * @param tag the HtmlTag to check.
497     * @return {@code true} if the HtmlTag is a single tag.
498     */
499    private static boolean isSingleTag(HtmlTag tag) {
500        // If it's a singleton tag (<p>, <br>, etc.), ignore it
501        // Can't simply not put them on the stack, since singletons
502        // like <dt> and <dd> (unhappily) may either be terminated
503        // or not terminated. Both options are legal.
504        return SINGLE_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
505    }
506
507    /**
508     * Determines if the HtmlTag is one which is allowed in a javadoc.
509     *
510     * @param tag the HtmlTag to check.
511     * @return {@code true} if the HtmlTag is an allowed html tag.
512     */
513    private static boolean isAllowedTag(HtmlTag tag) {
514        return ALLOWED_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
515    }
516
517    /**
518     * Determines if the given token is an extra HTML tag. This indicates that
519     * a close tag was found that does not have a corresponding open tag.
520     *
521     * @param token an HTML tag id for which a close was found.
522     * @param htmlStack a Stack of previous open HTML tags.
523     * @return {@code false} if a previous open tag was found
524     *         for the token.
525     */
526    private static boolean isExtraHtml(String token, Deque<HtmlTag> htmlStack) {
527        boolean isExtra = true;
528        for (final HtmlTag tag : htmlStack) {
529            // Loop, looking for tags that are closed.
530            // The loop is needed in case there are unclosed
531            // tags on the stack. In that case, the stack would
532            // not be empty, but this tag would still be extra.
533            if (token.equalsIgnoreCase(tag.getId())) {
534                isExtra = false;
535                break;
536            }
537        }
538
539        return isExtra;
540    }
541
542    /**
543     * Setter to specify the visibility scope where Javadoc comments are checked.
544     *
545     * @param scope a scope.
546     * @since 3.2
547     */
548    public void setScope(Scope scope) {
549        this.scope = scope;
550    }
551
552    /**
553     * Setter to specify the visibility scope where Javadoc comments are not checked.
554     *
555     * @param excludeScope a scope.
556     * @since 3.4
557     */
558    public void setExcludeScope(Scope excludeScope) {
559        this.excludeScope = excludeScope;
560    }
561
562    /**
563     * Setter to specify the format for matching the end of a sentence.
564     *
565     * @param pattern a pattern.
566     * @since 5.0
567     */
568    public void setEndOfSentenceFormat(Pattern pattern) {
569        endOfSentenceFormat = pattern;
570    }
571
572    /**
573     * Setter to control whether to check the first sentence for proper end of sentence.
574     *
575     * @param flag {@code true} if the first sentence is to be checked
576     * @since 3.2
577     */
578    public void setCheckFirstSentence(boolean flag) {
579        checkFirstSentence = flag;
580    }
581
582    /**
583     * Setter to control whether to check for incomplete HTML tags.
584     *
585     * @param flag {@code true} if HTML checking is to be performed.
586     * @since 3.2
587     */
588    public void setCheckHtml(boolean flag) {
589        checkHtml = flag;
590    }
591
592    /**
593     * Setter to control whether to check if the Javadoc is missing a describing text.
594     *
595     * @param flag {@code true} if empty Javadoc checking should be done.
596     * @since 3.4
597     */
598    public void setCheckEmptyJavadoc(boolean flag) {
599        checkEmptyJavadoc = flag;
600    }
601
602}