View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2025 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.internal;
21  
22  import static com.google.common.truth.Truth.assertWithMessage;
23  
24  import java.io.IOException;
25  import java.nio.file.Files;
26  import java.nio.file.Path;
27  import java.util.ArrayList;
28  import java.util.HashMap;
29  import java.util.List;
30  import java.util.Locale;
31  import java.util.Map;
32  import java.util.Optional;
33  import java.util.Set;
34  import java.util.stream.Stream;
35  
36  import javax.xml.parsers.ParserConfigurationException;
37  
38  import org.junit.jupiter.api.Test;
39  import org.w3c.dom.Document;
40  import org.w3c.dom.Element;
41  import org.w3c.dom.Node;
42  import org.w3c.dom.NodeList;
43  
44  import com.puppycrawl.tools.checkstyle.AbstractModuleTestSupport;
45  import com.puppycrawl.tools.checkstyle.internal.utils.XmlUtil;
46  
47  /**
48   * JUnit test suite for validating the integrity and consistency of Checkstyle's XDoc
49   * category {@code index.xml} files.
50   * This test verifies that:
51   * <ul>
52   *   <li>All checks are accurately listed in their respective category index files.</li>
53   *   <li>Hyperlinks correctly point to the corresponding XDoc page and section for each check.</li>
54   *   <li>Index descriptions accurately reflect the main XDoc descriptions.</li>
55   * </ul>
56   *
57   * <p>
58   * Prerequisites for execution:
59   * <ul>
60   *   <li>{@code mvn clean compile}</li>
61   *   <li>{@code mvn plexus-component-metadata:generate-metadata}
62   *       (for custom macro/parser discovery)</li>
63   * </ul>
64   */
65  public class XdocsCategoryIndexTest extends AbstractModuleTestSupport {
66  
67      private static final Path XDOC_CHECKS_DIR = Path.of("src", "site", "xdoc", "checks");
68  
69      @Override
70      protected String getPackageLocation() {
71          return "com.puppycrawl.tools.checkstyle.internal";
72      }
73  
74      @Test
75      public void testAllChecksListedInCategoryIndexAndDescriptionMatches() throws Exception {
76          final List<Path> checkXdocFiles = getCheckXdocFiles();
77  
78          for (final Path checkXdocFile : checkXdocFiles) {
79              final String mainSectionName = getMainSectionName(checkXdocFile);
80              final Path categoryDir = checkXdocFile.getParent();
81              final Path categoryIndexFile = categoryDir.resolve("index.xml");
82  
83              assertWithMessage("Category index file should exist for check: %s", checkXdocFile)
84                      .that(Files.exists(categoryIndexFile)).isTrue();
85  
86              final Map<String, CheckIndexInfo> indexedChecks = parseCategoryIndex(categoryIndexFile);
87              final Set<String> foundKeys = indexedChecks.keySet();
88  
89              final String checkNotFoundMsg = String.format(Locale.ROOT,
90                      "Check '%s' from %s not in %s. Found Checks: %s",
91                      mainSectionName, checkXdocFile.getFileName(), categoryIndexFile, foundKeys);
92              assertWithMessage(checkNotFoundMsg)
93                      .that(indexedChecks.containsKey(mainSectionName)).isTrue();
94  
95              final CheckIndexInfo checkInfoFromIndex = indexedChecks.get(mainSectionName);
96              final String internalErrorMsg = String.format(Locale.ROOT,
97                  "CheckInfo for '%s' null (key present). Test error.", mainSectionName);
98              assertWithMessage(internalErrorMsg)
99                      .that(checkInfoFromIndex)
100                     .isNotNull();
101 
102             // Validate Href
103             final String expectedHrefFileName = checkXdocFile.getFileName().toString()
104                     .replace(".xml", ".html");
105             final String expectedHref = expectedHrefFileName.toLowerCase(Locale.ROOT)
106                     + "#" + mainSectionName;
107             final String actualHref = checkInfoFromIndex.href();
108 
109             final String hrefMismatchMsg = String.format(Locale.ROOT,
110                     "Href mismatch for '%s' in %s." + "Expected: '%s', Found: '%s'",
111                     mainSectionName, categoryIndexFile, expectedHref, actualHref);
112             assertWithMessage(hrefMismatchMsg)
113                     .that(actualHref).isEqualTo(expectedHref);
114 
115             // Validate Description
116             final String descriptionFromXdoc = getCheckDescriptionFromXdoc(checkXdocFile);
117             final String descriptionFromIndex = checkInfoFromIndex.description();
118             final String normalizedIndexDesc = normalizeText(descriptionFromIndex);
119             final String normalizedXdocDesc = normalizeText(descriptionFromXdoc);
120 
121             final String descMismatchMsg = String.format(Locale.ROOT,
122                     "Check '%s' in index '%s': "
123                             + "index description is not a prefix of XDoc description.",
124                     mainSectionName, categoryIndexFile);
125             assertWithMessage(descMismatchMsg)
126                     .that(normalizedXdocDesc)
127                     .startsWith(normalizedIndexDesc);
128         }
129     }
130 
131     /**
132      * Scans the XDOC_CHECKS_DIR for all individual check XDoc files.
133      * It filters out common files like 'index.xml' and 'property_types.xml'.
134      *
135      * @return A list of paths to check XDoc files.
136      * @throws IOException if an I/O error occurs when walking the path.
137      */
138     private static List<Path> getCheckXdocFiles() throws IOException {
139         try (Stream<Path> paths = Files.walk(XDOC_CHECKS_DIR)) {
140             return paths
141                     .filter(Files::isRegularFile)
142                     .filter(path -> path.toString().endsWith(".xml"))
143                     .filter(path -> !"index.xml".equals(path.getFileName().toString()))
144                     .filter(path -> !"property_types.xml".equals(path.getFileName().toString()))
145                     .toList();
146         }
147     }
148 
149     /**
150      * Extracts the main section name from a check's XDoc file.
151      * This is typically the value of the 'name' attribute of the first &lt;section&gt; tag.
152      *
153      * @param checkXdocFile Path to the check's XDoc file.
154      * @return The main section name.
155      * @throws ParserConfigurationException if a DocumentBuilder cannot be created.
156      * @throws IOException if an I/O error occurs reading the file.
157      * @throws AssertionError if no &lt;section name=...&gt; is found.
158      */
159     private static String getMainSectionName(Path checkXdocFile)
160             throws ParserConfigurationException, IOException {
161         final String content = Files.readString(checkXdocFile);
162         final Document document = XmlUtil.getRawXml(checkXdocFile.toString(), content, content);
163         final NodeList sections = document.getElementsByTagName("section");
164 
165         for (int sectionIndex = 0; sectionIndex < sections.getLength(); sectionIndex++) {
166             final Node sectionNode = sections.item(sectionIndex);
167             if (sectionNode instanceof Element sectionElement
168                   && sectionElement.hasAttribute("name")) {
169                 return sectionElement.getAttribute("name");
170             }
171         }
172 
173         final String errorMsg = String.format(Locale.ROOT,
174                 "No <section name=...> found in %s", checkXdocFile);
175         throw new AssertionError(errorMsg);
176     }
177 
178     /**
179      * Extracts the description of a check from its XDoc file.
180      * It looks for a &lt;subsection name="Description"&gt; and then tries to find the content
181      * within a &lt;div&gt; or &lt;p&gt; tag.
182      * If not found, it aggregates direct text nodes of the subsection.
183      * As a last resort, it uses the full text content of the subsection.
184      *
185      * @param checkXdocFile Path to the check's XDoc file.
186      * @return The check's description text.
187      * @throws ParserConfigurationException if a DocumentBuilder cannot be created.
188      * @throws IOException if an I/O error occurs reading the file.
189      * @throws AssertionError if no suitable description subsection is found.
190      */
191     private static String getCheckDescriptionFromXdoc(Path checkXdocFile)
192             throws ParserConfigurationException, IOException {
193         final String content = Files.readString(checkXdocFile);
194         final Document document = XmlUtil.getRawXml(checkXdocFile.toString(), content, content);
195         final NodeList subsections = document.getElementsByTagName("subsection");
196 
197         for (int subsectionIdx = 0; subsectionIdx < subsections.getLength(); subsectionIdx++) {
198             final Node subsectionNode = subsections.item(subsectionIdx);
199             if (subsectionNode instanceof Element subsectionElement
200                 && "Description".equals(subsectionElement.getAttribute("name"))) {
201                 final Optional<String> description =
202                             getDescriptionFromSubsection(subsectionElement);
203                 if (description.isPresent()) {
204                     return description.get();
205                 }
206             }
207         }
208         final String errorMsg = String.format(Locale.ROOT,
209                 "No <subsection name=\"Description\"> with suitable content in %s",
210                 checkXdocFile);
211         throw new AssertionError(errorMsg);
212     }
213 
214     /**
215      * Extracts the description text from a given "Description" subsection element.
216      * It tries multiple strategies in order of preference:
217      * <ol>
218      *   <li>Text content of the first direct child {@code <div> }.</li>
219      *   <li>Text content of the first direct child {@code <p> }.</li>
220      *   <li>Aggregated direct text nodes of the subsection.</li>
221      *   <li>Full text content of the subsection.</li>
222      * </ol>
223      *
224      * @param subsectionElement The "Description" {@code <subsection> } DOM element.
225      * @return An {@link Optional} with the extracted description if found and non-blank,
226      *         otherwise {@link Optional#empty()}.
227      */
228     private static Optional<String> getDescriptionFromSubsection(Element subsectionElement) {
229         Optional<String> description = Optional.empty();
230         final Optional<String> textFromDiv = findTextInChildElements(subsectionElement, "div");
231         if (textFromDiv.isPresent()) {
232             description = textFromDiv;
233         }
234 
235         if (description.isEmpty()) {
236             final Optional<String> textFromP = findTextInChildElements(subsectionElement, "p");
237             if (textFromP.isPresent()) {
238                 description = textFromP;
239             }
240         }
241 
242         if (description.isEmpty()) {
243             final Optional<String> aggregatedText = getAggregatedDirectText(subsectionElement);
244             if (aggregatedText.isPresent()) {
245                 description = aggregatedText;
246             }
247         }
248 
249         if (description.isEmpty()) {
250             final String fullSubsectionText = subsectionElement.getTextContent();
251             if (fullSubsectionText != null && !fullSubsectionText.isBlank()) {
252                 description = Optional.of(fullSubsectionText);
253             }
254         }
255         return description;
256     }
257 
258     /**
259      * Finds the text content of the first non-blank direct child element with the given tag name.
260      *
261      * @param parent The parent DOM element.
262      * @param tagName The tag name to search for.
263      * @return An Optional containing the text if found, otherwise Optional.empty().
264      */
265     private static Optional<String> findTextInChildElements(Element parent, String tagName) {
266         Optional<String> foundText = Optional.empty();
267         for (final Element childElement : getChildrenElementsByTagName(parent, tagName)) {
268             final String text = childElement.getTextContent();
269             if (text != null && !text.isBlank()) {
270                 foundText = Optional.of(text);
271                 break;
272             }
273         }
274         return foundText;
275     }
276 
277     /**
278      * Aggregates text from all direct TEXT_NODE children of a parent element.
279      *
280      * @param parent The parent DOM element.
281      * @return An Optional containing the aggregated non-blank text, otherwise Optional.empty().
282      */
283     private static Optional<String> getAggregatedDirectText(Element parent) {
284         final StringBuilder directTextContent = new StringBuilder(32);
285         final NodeList directChildren = parent.getChildNodes();
286         for (int childIdx = 0; childIdx < directChildren.getLength(); childIdx++) {
287             final Node directChild = directChildren.item(childIdx);
288             if (directChild.getNodeType() == Node.TEXT_NODE) {
289                 directTextContent.append(directChild.getNodeValue());
290             }
291         }
292         final String aggregatedText = directTextContent.toString();
293         Optional<String> result = Optional.empty();
294         if (!aggregatedText.isBlank()) {
295             result = Optional.of(aggregatedText);
296         }
297         return result;
298     }
299 
300     /**
301      * Parses a category index.xml file to extract information about the checks listed.
302      * It iterates through all tables and their rows to find check names, hrefs, and descriptions.
303      *
304      * @param categoryIndexFile Path to the category's index.xml file.
305      * @return A map with check names (from &lt;a&gt; tag text) as keys
306      *         and {@link CheckIndexInfo} objects as values.
307      * @throws ParserConfigurationException if a DocumentBuilder cannot be created.
308      * @throws IOException if an I/O error occurs reading the file.
309      * @throws AssertionError if no &lt;table&gt; is found in the index file.
310      */
311     private static Map<String, CheckIndexInfo> parseCategoryIndex(Path categoryIndexFile)
312             throws ParserConfigurationException, IOException {
313         final Map<String, CheckIndexInfo> indexedChecks = new HashMap<>();
314         final String content = Files.readString(categoryIndexFile);
315         final Document document = XmlUtil.getRawXml(categoryIndexFile.toString(), content, content);
316         final NodeList tableNodes = document.getElementsByTagName("table");
317 
318         if (tableNodes.getLength() == 0) {
319             final String errorMsg = String.format(Locale.ROOT,
320                 "No <table> found in %s", categoryIndexFile);
321             throw new AssertionError(errorMsg);
322         }
323 
324         for (int tableIdx = 0; tableIdx < tableNodes.getLength(); tableIdx++) {
325             final Node tableNode = tableNodes.item(tableIdx);
326             if (tableNode instanceof Element element) {
327                 processTableElement(element, indexedChecks);
328             }
329         }
330         return indexedChecks;
331     }
332 
333     /**
334      * Processes a single &lt;table&gt; element from a category index file.
335      * Iterates over its rows, skipping a potential header row, and processes data rows.
336      *
337      * @param tableElement The &lt;table&gt; DOM element.
338      * @param indexedChecks The map to populate with check information.
339      */
340     private static void processTableElement(Element tableElement,
341                                             Map<String, CheckIndexInfo> indexedChecks) {
342         final List<Element> rowElements = getChildrenElementsByTagName(tableElement, "tr");
343         boolean isFirstRowInTable = true;
344 
345         for (final Element rowElement : rowElements) {
346             if (isFirstRowInTable) {
347                 isFirstRowInTable = false;
348                 if (isHeaderRow(rowElement)) {
349                     continue;
350                 }
351             }
352             processDataRow(rowElement, indexedChecks);
353         }
354     }
355 
356     /**
357      * Checks if a given table row element is a header row (i.e., contains &lt;th&gt; elements).
358      *
359      * @param rowElement The &lt;tr&gt; DOM element.
360      * @return True if it's a header row, false otherwise.
361      */
362     private static boolean isHeaderRow(Element rowElement) {
363         return !getChildrenElementsByTagName(rowElement, "th").isEmpty();
364     }
365 
366     /**
367      * Processes a data row (&lt;tr&gt; with &lt;td&gt; children) from a category index table.
368      * Extracts the check name, href, and description.
369      *
370      * @param rowElement The &lt;tr&gt; DOM element representing a data row.
371      * @param indexedChecks The map to populate with check information.
372      */
373     private static void processDataRow(Element rowElement,
374                                        Map<String, CheckIndexInfo> indexedChecks) {
375         final List<Element> cellElements = getChildrenElementsByTagName(rowElement, "td");
376         if (cellElements.size() >= 2) {
377             final Element nameCell = cellElements.get(0);
378             final Element descCell = cellElements.get(1);
379 
380             getFirstChildElementByTagName(nameCell, "a").ifPresent(anchorElement -> {
381                 if (anchorElement.hasAttribute("href")) {
382                     final String checkNameInIndex = anchorElement.getTextContent().trim();
383                     final String href = anchorElement.getAttribute("href");
384                     final String description = descCell.getTextContent();
385                     indexedChecks.put(checkNameInIndex,
386                             new CheckIndexInfo(href, description));
387                 }
388             });
389         }
390     }
391 
392     /**
393      * Retrieves all child elements of a given parent node that match the specified tag name.
394      *
395      * @param parent The parent DOM node.
396      * @param tagName The tag name to filter child elements by.
397      * @return A list of matching child elements. Empty if parent is null or no matches.
398      */
399     private static List<Element> getChildrenElementsByTagName(Node parent, String tagName) {
400         final List<Element> elements = new ArrayList<>();
401         if (parent != null) {
402             final NodeList children = parent.getChildNodes();
403             for (int childIdx = 0; childIdx < children.getLength(); childIdx++) {
404                 final Node child = children.item(childIdx);
405                 if (child instanceof Element element && tagName.equals(child.getNodeName())) {
406                     elements.add(element);
407                 }
408             }
409         }
410         return elements;
411     }
412 
413     /**
414      * Retrieves the first child element of a given parent node that matches the specified tag name.
415      *
416      * @param parent The parent DOM node.
417      * @param tagName The tag name to filter child elements by.
418      * @return An {@link Optional} with the first matching child element,
419      *         or empty {@link Optional} if none found or parent is null.
420      */
421     private static Optional<Element> getFirstChildElementByTagName(Node parent, String tagName) {
422         Optional<Element> result = Optional.empty();
423         if (parent != null) {
424             final NodeList children = parent.getChildNodes();
425             for (int childIdx = 0; childIdx < children.getLength(); childIdx++) {
426                 final Node child = children.item(childIdx);
427                 if (child instanceof Element element && tagName.equals(child.getNodeName())) {
428                     result = Optional.of(element);
429                     break;
430                 }
431             }
432         }
433         return result;
434     }
435 
436     /**
437      * Normalizes a string by trimming whitespace, replacing non-breaking spaces,
438      * and collapsing multiple internal whitespace characters into a single space.
439      *
440      * @param text The text to normalize.
441      * @return The normalized text, or an empty string if the input is null.
442      */
443     private static String normalizeText(String text) {
444         String normalized = "";
445         if (text != null) {
446             normalized = text.replace("\u00a0", " ").trim().replaceAll("\\s+", " ");
447         }
448         return normalized;
449     }
450 
451     /**
452      * Stores information about a check as parsed from an index.xml file.
453      * It holds the hyperlink reference (href) and the description text.
454      */
455     private static final class CheckIndexInfo {
456         private final String hrefValue;
457         private final String descriptionText;
458 
459         /**
460          * Constructs a new CheckIndexInfo instance.
461          *
462          * @param href The href attribute for the check's link.
463          * @param description The description text for the check.
464          */
465         /* package */ CheckIndexInfo(String href, String description) {
466             hrefValue = href;
467             descriptionText = description;
468         }
469 
470         /**
471          * Gets the href attribute for the check's link.
472          *
473          * @return The href string.
474          */
475         private String href() {
476             return hrefValue;
477         }
478 
479         /**
480          * Gets the description text for the check.
481          *
482          * @return The description string.
483          */
484         private String description() {
485             return descriptionText;
486         }
487     }
488 }