View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2025 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.internal;
21  
22  import static com.google.common.truth.Truth.assertWithMessage;
23  
24  import java.io.IOException;
25  import java.nio.file.Files;
26  import java.nio.file.Path;
27  import java.nio.file.Paths;
28  import java.util.ArrayList;
29  import java.util.HashMap;
30  import java.util.List;
31  import java.util.Locale;
32  import java.util.Map;
33  import java.util.Optional;
34  import java.util.Set;
35  import java.util.stream.Collectors;
36  import java.util.stream.Stream;
37  
38  import javax.xml.parsers.ParserConfigurationException;
39  
40  import org.junit.jupiter.api.Test;
41  import org.w3c.dom.Document;
42  import org.w3c.dom.Element;
43  import org.w3c.dom.Node;
44  import org.w3c.dom.NodeList;
45  
46  import com.puppycrawl.tools.checkstyle.AbstractModuleTestSupport;
47  import com.puppycrawl.tools.checkstyle.internal.utils.XmlUtil;
48  
49  /**
50   * JUnit test suite for validating the integrity and consistency of Checkstyle's XDoc
51   * category {@code index.xml} files.
52   * This test verifies that:
53   * <ul>
54   *   <li>All checks are accurately listed in their respective category index files.</li>
55   *   <li>Hyperlinks correctly point to the corresponding XDoc page and section for each check.</li>
56   *   <li>Index descriptions accurately reflect the main XDoc descriptions.</li>
57   * </ul>
58   *
59   * <p>
60   * Prerequisites for execution:
61   * <ul>
62   *   <li>{@code mvn clean compile}</li>
63   *   <li>{@code mvn plexus-component-metadata:generate-metadata}
64   *       (for custom macro/parser discovery)</li>
65   * </ul>
66   */
67  public class XdocsCategoryIndexTest extends AbstractModuleTestSupport {
68  
69      private static final Path XDOC_CHECKS_DIR = Paths.get("src", "site", "xdoc", "checks");
70  
71      @Override
72      protected String getPackageLocation() {
73          return "com.puppycrawl.tools.checkstyle.internal";
74      }
75  
76      @Test
77      public void testAllChecksListedInCategoryIndexAndDescriptionMatches() throws Exception {
78          final List<Path> checkXdocFiles = getCheckXdocFiles();
79  
80          for (final Path checkXdocFile : checkXdocFiles) {
81              final String mainSectionName = getMainSectionName(checkXdocFile);
82              final Path categoryDir = checkXdocFile.getParent();
83              final Path categoryIndexFile = categoryDir.resolve("index.xml");
84  
85              assertWithMessage("Category index file should exist for check: %s", checkXdocFile)
86                      .that(Files.exists(categoryIndexFile)).isTrue();
87  
88              final Map<String, CheckIndexInfo> indexedChecks = parseCategoryIndex(categoryIndexFile);
89              final Set<String> foundKeys = indexedChecks.keySet();
90  
91              final String checkNotFoundFmt = "Check '%s' from %s not in %s. Found Checks: %s";
92              final String checkNotFoundMsg = String.format(Locale.ROOT,
93                      checkNotFoundFmt,
94                      mainSectionName, checkXdocFile.getFileName(), categoryIndexFile, foundKeys);
95              assertWithMessage(checkNotFoundMsg)
96                      .that(indexedChecks.containsKey(mainSectionName)).isTrue();
97  
98              final CheckIndexInfo checkInfoFromIndex = indexedChecks.get(mainSectionName);
99              final String internalErrorMsg = String.format(Locale.ROOT,
100                 "CheckInfo for '%s' null (key present). Test error.", mainSectionName);
101             assertWithMessage(internalErrorMsg)
102                     .that(checkInfoFromIndex)
103                     .isNotNull();
104 
105             // Validate Href
106             final String expectedHrefFileName = checkXdocFile.getFileName().toString()
107                     .replace(".xml", ".html");
108             final String expectedHref = expectedHrefFileName.toLowerCase(Locale.ROOT)
109                     + "#" + mainSectionName;
110             final String actualHref = checkInfoFromIndex.href();
111 
112             final String hrefMismatchFmt = "Href mismatch for '%s' in %s."
113                     + "Expected: '%s', Found: '%s'";
114             final String hrefMismatchMsg = String.format(Locale.ROOT,
115                     hrefMismatchFmt,
116                     mainSectionName, categoryIndexFile, expectedHref, actualHref);
117             assertWithMessage(hrefMismatchMsg)
118                     .that(actualHref).isEqualTo(expectedHref);
119 
120             // Validate Description
121             final String descriptionFromXdoc = getCheckDescriptionFromXdoc(checkXdocFile);
122             final String descriptionFromIndex = checkInfoFromIndex.description();
123             final String normalizedIndexDesc = normalizeText(descriptionFromIndex);
124             final String normalizedXdocDesc = normalizeText(descriptionFromXdoc);
125 
126             final String descMismatchFmt = "Check '%s' in index '%s': "
127                     + "index description is not a prefix of XDoc description.";
128             final String descMismatchMsg = String.format(Locale.ROOT,
129                     descMismatchFmt,
130                     mainSectionName, categoryIndexFile);
131             assertWithMessage(descMismatchMsg)
132                     .that(normalizedXdocDesc)
133                     .startsWith(normalizedIndexDesc);
134         }
135     }
136 
137     /**
138      * Scans the XDOC_CHECKS_DIR for all individual check XDoc files.
139      * It filters out common files like 'index.xml' and 'property_types.xml'.
140      *
141      * @return A list of paths to check XDoc files.
142      * @throws IOException if an I/O error occurs when walking the path.
143      */
144     private static List<Path> getCheckXdocFiles() throws IOException {
145         try (Stream<Path> paths = Files.walk(XDOC_CHECKS_DIR)) {
146             return paths
147                     .filter(Files::isRegularFile)
148                     .filter(path -> path.toString().endsWith(".xml"))
149                     .filter(path -> !"index.xml".equals(path.getFileName().toString()))
150                     .filter(path -> !"property_types.xml".equals(path.getFileName().toString()))
151                     .collect(Collectors.toUnmodifiableList());
152         }
153     }
154 
155     /**
156      * Extracts the main section name from a check's XDoc file.
157      * This is typically the value of the 'name' attribute of the first &lt;section&gt; tag.
158      *
159      * @param checkXdocFile Path to the check's XDoc file.
160      * @return The main section name.
161      * @throws ParserConfigurationException if a DocumentBuilder cannot be created.
162      * @throws IOException if an I/O error occurs reading the file.
163      * @throws AssertionError if no &lt;section name=...&gt; is found.
164      */
165     private static String getMainSectionName(Path checkXdocFile)
166             throws ParserConfigurationException, IOException {
167         final String content = Files.readString(checkXdocFile);
168         final Document document = XmlUtil.getRawXml(checkXdocFile.toString(), content, content);
169         final NodeList sections = document.getElementsByTagName("section");
170 
171         for (int sectionIndex = 0; sectionIndex < sections.getLength(); sectionIndex++) {
172             final Node sectionNode = sections.item(sectionIndex);
173             if (sectionNode instanceof Element) {
174                 final Element sectionElement = (Element) sectionNode;
175                 if (sectionElement.hasAttribute("name")) {
176                     return sectionElement.getAttribute("name");
177                 }
178             }
179         }
180         final String errorFormat = "No <section name=...> found in %s";
181         final String errorMsg = String.format(Locale.ROOT, errorFormat, checkXdocFile);
182         throw new AssertionError(errorMsg);
183     }
184 
185     /**
186      * Extracts the description of a check from its XDoc file.
187      * It looks for a &lt;subsection name="Description"&gt; and then tries to find the content
188      * within a &lt;div&gt; or &lt;p&gt; tag.
189      * If not found, it aggregates direct text nodes of the subsection.
190      * As a last resort, it uses the full text content of the subsection.
191      *
192      * @param checkXdocFile Path to the check's XDoc file.
193      * @return The check's description text.
194      * @throws ParserConfigurationException if a DocumentBuilder cannot be created.
195      * @throws IOException if an I/O error occurs reading the file.
196      * @throws AssertionError if no suitable description subsection is found.
197      */
198     private static String getCheckDescriptionFromXdoc(Path checkXdocFile)
199             throws ParserConfigurationException, IOException {
200         final String content = Files.readString(checkXdocFile);
201         final Document document = XmlUtil.getRawXml(checkXdocFile.toString(), content, content);
202         final NodeList subsections = document.getElementsByTagName("subsection");
203 
204         for (int subsectionIdx = 0; subsectionIdx < subsections.getLength(); subsectionIdx++) {
205             final Node subsectionNode = subsections.item(subsectionIdx);
206             if (subsectionNode instanceof Element) {
207                 final Element subsectionElement = (Element) subsectionNode;
208                 if ("Description".equals(subsectionElement.getAttribute("name"))) {
209                     final Optional<String> description =
210                             getDescriptionFromSubsection(subsectionElement);
211                     if (description.isPresent()) {
212                         return description.get();
213                     }
214                 }
215             }
216         }
217         final String errorFormat =
218                 "No <subsection name=\"Description\"> with suitable content in %s";
219         final String errorMsg = String.format(Locale.ROOT, errorFormat, checkXdocFile);
220         throw new AssertionError(errorMsg);
221     }
222 
223     /**
224      * Extracts the description text from a given "Description" subsection element.
225      * It tries multiple strategies in order of preference:
226      * <ol>
227      *   <li>Text content of the first direct child {@code <div> }.</li>
228      *   <li>Text content of the first direct child {@code <p> }.</li>
229      *   <li>Aggregated direct text nodes of the subsection.</li>
230      *   <li>Full text content of the subsection.</li>
231      * </ol>
232      *
233      * @param subsectionElement The "Description" {@code <subsection> } DOM element.
234      * @return An {@link Optional} with the extracted description if found and non-blank,
235      *         otherwise {@link Optional#empty()}.
236      */
237     private static Optional<String> getDescriptionFromSubsection(Element subsectionElement) {
238         Optional<String> description = Optional.empty();
239         final Optional<String> textFromDiv = findTextInChildElements(subsectionElement, "div");
240         if (textFromDiv.isPresent()) {
241             description = textFromDiv;
242         }
243 
244         if (description.isEmpty()) {
245             final Optional<String> textFromP = findTextInChildElements(subsectionElement, "p");
246             if (textFromP.isPresent()) {
247                 description = textFromP;
248             }
249         }
250 
251         if (description.isEmpty()) {
252             final Optional<String> aggregatedText = getAggregatedDirectText(subsectionElement);
253             if (aggregatedText.isPresent()) {
254                 description = aggregatedText;
255             }
256         }
257 
258         if (description.isEmpty()) {
259             final String fullSubsectionText = subsectionElement.getTextContent();
260             if (fullSubsectionText != null && !fullSubsectionText.isBlank()) {
261                 description = Optional.of(fullSubsectionText);
262             }
263         }
264         return description;
265     }
266 
267     /**
268      * Finds the text content of the first non-blank direct child element with the given tag name.
269      *
270      * @param parent The parent DOM element.
271      * @param tagName The tag name to search for.
272      * @return An Optional containing the text if found, otherwise Optional.empty().
273      */
274     private static Optional<String> findTextInChildElements(Element parent, String tagName) {
275         Optional<String> foundText = Optional.empty();
276         for (final Element childElement : getChildrenElementsByTagName(parent, tagName)) {
277             final String text = childElement.getTextContent();
278             if (text != null && !text.isBlank()) {
279                 foundText = Optional.of(text);
280                 break;
281             }
282         }
283         return foundText;
284     }
285 
286     /**
287      * Aggregates text from all direct TEXT_NODE children of a parent element.
288      *
289      * @param parent The parent DOM element.
290      * @return An Optional containing the aggregated non-blank text, otherwise Optional.empty().
291      */
292     private static Optional<String> getAggregatedDirectText(Element parent) {
293         final StringBuilder directTextContent = new StringBuilder(32);
294         final NodeList directChildren = parent.getChildNodes();
295         for (int childIdx = 0; childIdx < directChildren.getLength(); childIdx++) {
296             final Node directChild = directChildren.item(childIdx);
297             if (directChild.getNodeType() == Node.TEXT_NODE) {
298                 directTextContent.append(directChild.getNodeValue());
299             }
300         }
301         final String aggregatedText = directTextContent.toString();
302         Optional<String> result = Optional.empty();
303         if (!aggregatedText.isBlank()) {
304             result = Optional.of(aggregatedText);
305         }
306         return result;
307     }
308 
309     /**
310      * Parses a category index.xml file to extract information about the checks listed.
311      * It iterates through all tables and their rows to find check names, hrefs, and descriptions.
312      *
313      * @param categoryIndexFile Path to the category's index.xml file.
314      * @return A map with check names (from &lt;a&gt; tag text) as keys
315      *         and {@link CheckIndexInfo} objects as values.
316      * @throws ParserConfigurationException if a DocumentBuilder cannot be created.
317      * @throws IOException if an I/O error occurs reading the file.
318      * @throws AssertionError if no &lt;table&gt; is found in the index file.
319      */
320     private static Map<String, CheckIndexInfo> parseCategoryIndex(Path categoryIndexFile)
321             throws ParserConfigurationException, IOException {
322         final Map<String, CheckIndexInfo> indexedChecks = new HashMap<>();
323         final String content = Files.readString(categoryIndexFile);
324         final Document document = XmlUtil.getRawXml(categoryIndexFile.toString(), content, content);
325         final NodeList tableNodes = document.getElementsByTagName("table");
326 
327         if (tableNodes.getLength() == 0) {
328             final String errorMsg = String.format(Locale.ROOT,
329                 "No <table> found in %s", categoryIndexFile);
330             throw new AssertionError(errorMsg);
331         }
332 
333         for (int tableIdx = 0; tableIdx < tableNodes.getLength(); tableIdx++) {
334             final Node tableNode = tableNodes.item(tableIdx);
335             if (tableNode instanceof Element) {
336                 processTableElement((Element) tableNode, indexedChecks);
337             }
338         }
339         return indexedChecks;
340     }
341 
342     /**
343      * Processes a single &lt;table&gt; element from a category index file.
344      * Iterates over its rows, skipping a potential header row, and processes data rows.
345      *
346      * @param tableElement The &lt;table&gt; DOM element.
347      * @param indexedChecks The map to populate with check information.
348      */
349     private static void processTableElement(Element tableElement,
350                                             Map<String, CheckIndexInfo> indexedChecks) {
351         final List<Element> rowElements = getChildrenElementsByTagName(tableElement, "tr");
352         boolean isFirstRowInTable = true;
353 
354         for (final Element rowElement : rowElements) {
355             if (isFirstRowInTable) {
356                 isFirstRowInTable = false;
357                 if (isHeaderRow(rowElement)) {
358                     continue;
359                 }
360             }
361             processDataRow(rowElement, indexedChecks);
362         }
363     }
364 
365     /**
366      * Checks if a given table row element is a header row (i.e., contains &lt;th&gt; elements).
367      *
368      * @param rowElement The &lt;tr&gt; DOM element.
369      * @return True if it's a header row, false otherwise.
370      */
371     private static boolean isHeaderRow(Element rowElement) {
372         return !getChildrenElementsByTagName(rowElement, "th").isEmpty();
373     }
374 
375     /**
376      * Processes a data row (&lt;tr&gt; with &lt;td&gt; children) from a category index table.
377      * Extracts the check name, href, and description.
378      *
379      * @param rowElement The &lt;tr&gt; DOM element representing a data row.
380      * @param indexedChecks The map to populate with check information.
381      */
382     private static void processDataRow(Element rowElement,
383                                        Map<String, CheckIndexInfo> indexedChecks) {
384         final List<Element> cellElements = getChildrenElementsByTagName(rowElement, "td");
385         if (cellElements.size() >= 2) {
386             final Element nameCell = cellElements.get(0);
387             final Element descCell = cellElements.get(1);
388 
389             getFirstChildElementByTagName(nameCell, "a").ifPresent(anchorElement -> {
390                 if (anchorElement.hasAttribute("href")) {
391                     final String checkNameInIndex = anchorElement.getTextContent().trim();
392                     final String href = anchorElement.getAttribute("href");
393                     final String description = descCell.getTextContent();
394                     indexedChecks.put(checkNameInIndex,
395                             new CheckIndexInfo(href, description));
396                 }
397             });
398         }
399     }
400 
401     /**
402      * Retrieves all child elements of a given parent node that match the specified tag name.
403      *
404      * @param parent The parent DOM node.
405      * @param tagName The tag name to filter child elements by.
406      * @return A list of matching child elements. Empty if parent is null or no matches.
407      */
408     private static List<Element> getChildrenElementsByTagName(Node parent, String tagName) {
409         final List<Element> elements = new ArrayList<>();
410         if (parent != null) {
411             final NodeList children = parent.getChildNodes();
412             for (int childIdx = 0; childIdx < children.getLength(); childIdx++) {
413                 final Node child = children.item(childIdx);
414                 if (child instanceof Element && tagName.equals(child.getNodeName())) {
415                     elements.add((Element) child);
416                 }
417             }
418         }
419         return elements;
420     }
421 
422     /**
423      * Retrieves the first child element of a given parent node that matches the specified tag name.
424      *
425      * @param parent The parent DOM node.
426      * @param tagName The tag name to filter child elements by.
427      * @return An {@link Optional} with the first matching child element,
428      *         or empty {@link Optional} if none found or parent is null.
429      */
430     private static Optional<Element> getFirstChildElementByTagName(Node parent, String tagName) {
431         Optional<Element> result = Optional.empty();
432         if (parent != null) {
433             final NodeList children = parent.getChildNodes();
434             for (int childIdx = 0; childIdx < children.getLength(); childIdx++) {
435                 final Node child = children.item(childIdx);
436                 if (child instanceof Element && tagName.equals(child.getNodeName())) {
437                     result = Optional.of((Element) child);
438                     break;
439                 }
440             }
441         }
442         return result;
443     }
444 
445     /**
446      * Normalizes a string by trimming whitespace, replacing non-breaking spaces,
447      * and collapsing multiple internal whitespace characters into a single space.
448      *
449      * @param text The text to normalize.
450      * @return The normalized text, or an empty string if the input is null.
451      */
452     private static String normalizeText(String text) {
453         String normalized = "";
454         if (text != null) {
455             normalized = text.replace("\u00a0", " ").trim().replaceAll("\\s+", " ");
456         }
457         return normalized;
458     }
459 
460     /**
461      * Stores information about a check as parsed from an index.xml file.
462      * It holds the hyperlink reference (href) and the description text.
463      */
464     private static final class CheckIndexInfo {
465         private final String hrefValue;
466         private final String descriptionText;
467 
468         /**
469          * Constructs a new CheckIndexInfo instance.
470          *
471          * @param href The href attribute for the check's link.
472          * @param description The description text for the check.
473          */
474         /* package */ CheckIndexInfo(String href, String description) {
475             hrefValue = href;
476             descriptionText = description;
477         }
478 
479         /**
480          * Gets the href attribute for the check's link.
481          *
482          * @return The href string.
483          */
484         public String href() {
485             return hrefValue;
486         }
487 
488         /**
489          * Gets the description text for the check.
490          *
491          * @return The description string.
492          */
493         public String description() {
494             return descriptionText;
495         }
496     }
497 }