1 ///////////////////////////////////////////////////////////////////////////////////////////////
2 // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3 // Copyright (C) 2001-2026 the original author or authors.
4 //
5 // This library is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU Lesser General Public
7 // License as published by the Free Software Foundation; either
8 // version 2.1 of the License, or (at your option) any later version.
9 //
10 // This library is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public
16 // License along with this library; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 ///////////////////////////////////////////////////////////////////////////////////////////////
19
20 package com.puppycrawl.tools.checkstyle.meta;
21
22 import java.util.Optional;
23 import java.util.regex.Matcher;
24 import java.util.regex.Pattern;
25
26 import com.puppycrawl.tools.checkstyle.api.DetailNode;
27 import com.puppycrawl.tools.checkstyle.api.JavadocCommentsTokenTypes;
28 import com.puppycrawl.tools.checkstyle.utils.JavadocUtil;
29
30 /**
31 * Class for scraping module metadata from the corresponding class' class-level javadoc.
32 */
33 public final class JavadocMetadataScraperUtil {
34
35 /** Regular expression for detecting ANTLR tokens(for e.g. CLASS_DEF). */
36 private static final Pattern TOKEN_TEXT_PATTERN = Pattern.compile("([A-Z_]{2,})+");
37
38 /**
39 * Private utility constructor.
40 */
41 private JavadocMetadataScraperUtil() {
42 }
43
44 /**
45 * Performs a depth-first traversal of the subtree starting at {@code startNode}
46 * and ending at {@code endNode}, and constructs the concatenated text of all nodes
47 * in that range, ignoring {@code JavadocToken} texts.
48 *
49 * @param startNode the node where traversal begins (inclusive)
50 * @param endNode the node where traversal ends (inclusive)
51 * @return the constructed text from the specified subtree range
52 */
53 public static String constructSubTreeText(DetailNode startNode,
54 DetailNode endNode) {
55 DetailNode curNode = startNode;
56 final StringBuilder result = new StringBuilder(1024);
57
58 while (curNode != null) {
59 if (isContentToWrite(curNode)) {
60 String childText = curNode.getText();
61
62 if (isInsideCodeInlineTag(curNode)) {
63 childText = adjustCodeInlineTagChildToHtml(curNode);
64 }
65 else if (isInsideLiteralInlineTag(curNode)) {
66 childText = adjustLiteralInlineTagChildToText(curNode);
67 }
68
69 result.append(childText);
70 }
71
72 DetailNode toVisit = curNode.getFirstChild();
73 while (curNode != endNode && toVisit == null) {
74 toVisit = curNode.getNextSibling();
75 curNode = curNode.getParent();
76 }
77
78 curNode = toVisit;
79 }
80 return result.toString().trim();
81 }
82
83 /**
84 * Checks whether the given node is inside a {@code @code} Javadoc inline tag.
85 *
86 * @param node the node to check
87 * @return true if the node is inside a {@code @code} inline tag, false otherwise
88 */
89 private static boolean isInsideCodeInlineTag(DetailNode node) {
90 return node.getParent() != null
91 && node.getParent().getType() == JavadocCommentsTokenTypes.CODE_INLINE_TAG;
92 }
93
94 /**
95 * Checks whether the given node is inside a {@code @literal} Javadoc inline tag.
96 *
97 * @param node the node to check
98 * @return true if the node is inside a {@code @literal} inline tag, false otherwise
99 */
100 private static boolean isInsideLiteralInlineTag(DetailNode node) {
101 return node.getParent() != null
102 && node.getParent().getType() == JavadocCommentsTokenTypes.LITERAL_INLINE_TAG;
103 }
104
105 /**
106 * Checks whether selected Javadoc node is considered as something to write.
107 *
108 * @param detailNode javadoc node to check.
109 * @return whether javadoc node is something to write.
110 */
111 private static boolean isContentToWrite(DetailNode detailNode) {
112
113 return detailNode.getType() != JavadocCommentsTokenTypes.LEADING_ASTERISK
114 && (detailNode.getType() == JavadocCommentsTokenTypes.TEXT
115 || !TOKEN_TEXT_PATTERN.matcher(detailNode.getText()).matches());
116 }
117
118 /**
119 * Adjusts certain child of {@code @code} Javadoc inline tag to its analogous html format.
120 *
121 * @param codeChild {@code @code} child to convert.
122 * @return converted {@code @code} child element, otherwise just the original text.
123 */
124 public static String adjustCodeInlineTagChildToHtml(DetailNode codeChild) {
125
126 return switch (codeChild.getType()) {
127 case JavadocCommentsTokenTypes.JAVADOC_INLINE_TAG_END -> "</code>";
128 case JavadocCommentsTokenTypes.TAG_NAME -> "";
129 case JavadocCommentsTokenTypes.JAVADOC_INLINE_TAG_START -> "<code>";
130 default -> escapeXmlChars(codeChild.getText().trim());
131 };
132 }
133
134 /**
135 * Adjusts a child of {@code @literal} Javadoc inline tag to its XML-escaped plain text form.
136 *
137 * @param literalChild child node of the {@code @literal} inline tag.
138 * @return escaped text for content nodes, or empty string for structural tokens.
139 */
140 public static String adjustLiteralInlineTagChildToText(DetailNode literalChild) {
141 return switch (literalChild.getType()) {
142 case JavadocCommentsTokenTypes.JAVADOC_INLINE_TAG_END,
143 JavadocCommentsTokenTypes.JAVADOC_INLINE_TAG_START,
144 JavadocCommentsTokenTypes.TAG_NAME -> "";
145 default -> escapeXmlChars(literalChild.getText().trim());
146 };
147 }
148
149 /**
150 * Escapes special XML characters in the given text.
151 *
152 * @param text the text to escape.
153 * @return text with XML special characters escaped.
154 */
155 private static String escapeXmlChars(String text) {
156 return text.replace("&", "&")
157 .replace("<", "<")
158 .replace(">", ">");
159 }
160
161 /**
162 * Returns the first child node of the given parent that matches the provided {@code tokenType}.
163 *
164 * @param node the parent node
165 * @param tokenType the token type to match
166 * @return an {@link Optional} containing the first matching child node,
167 * or an empty {@link Optional} if none is found
168 */
169 private static Optional<DetailNode> getFirstChildOfType(DetailNode node, int tokenType) {
170 return JavadocUtil.getAllNodesOfType(node, tokenType).stream().findFirst();
171 }
172
173 /**
174 * Checks whether the first child {@code JavadocTokenType.TEXT} node matches given pattern.
175 *
176 * @param ast parent javadoc node
177 * @param pattern pattern to match
178 * @return true if one of child text nodes matches pattern
179 */
180 public static boolean isChildNodeTextMatches(DetailNode ast, Pattern pattern) {
181 return getFirstChildOfType(ast, JavadocCommentsTokenTypes.TEXT)
182 .map(DetailNode::getText)
183 .map(pattern::matcher)
184 .map(Matcher::matches)
185 .orElse(Boolean.FALSE);
186 }
187 }