View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2025 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.internal;
21  
22  import static com.google.common.truth.Truth.assertWithMessage;
23  
24  import java.io.IOException;
25  import java.nio.file.Files;
26  import java.nio.file.Path;
27  import java.nio.file.Paths;
28  import java.util.ArrayList;
29  import java.util.HashMap;
30  import java.util.List;
31  import java.util.Locale;
32  import java.util.Map;
33  import java.util.Optional;
34  import java.util.Set;
35  import java.util.stream.Stream;
36  
37  import javax.xml.parsers.ParserConfigurationException;
38  
39  import org.junit.jupiter.api.Test;
40  import org.w3c.dom.Document;
41  import org.w3c.dom.Element;
42  import org.w3c.dom.Node;
43  import org.w3c.dom.NodeList;
44  
45  import com.puppycrawl.tools.checkstyle.AbstractModuleTestSupport;
46  import com.puppycrawl.tools.checkstyle.internal.utils.XmlUtil;
47  
48  /**
49   * JUnit test suite for validating the integrity and consistency of Checkstyle's XDoc
50   * category {@code index.xml} files.
51   * This test verifies that:
52   * <ul>
53   *   <li>All checks are accurately listed in their respective category index files.</li>
54   *   <li>Hyperlinks correctly point to the corresponding XDoc page and section for each check.</li>
55   *   <li>Index descriptions accurately reflect the main XDoc descriptions.</li>
56   * </ul>
57   *
58   * <p>
59   * Prerequisites for execution:
60   * <ul>
61   *   <li>{@code mvn clean compile}</li>
62   *   <li>{@code mvn plexus-component-metadata:generate-metadata}
63   *       (for custom macro/parser discovery)</li>
64   * </ul>
65   */
66  public class XdocsCategoryIndexTest extends AbstractModuleTestSupport {
67  
68      private static final Path XDOC_CHECKS_DIR = Paths.get("src", "site", "xdoc", "checks");
69  
70      @Override
71      protected String getPackageLocation() {
72          return "com.puppycrawl.tools.checkstyle.internal";
73      }
74  
75      @Test
76      public void testAllChecksListedInCategoryIndexAndDescriptionMatches() throws Exception {
77          final List<Path> checkXdocFiles = getCheckXdocFiles();
78  
79          for (final Path checkXdocFile : checkXdocFiles) {
80              final String mainSectionName = getMainSectionName(checkXdocFile);
81              final Path categoryDir = checkXdocFile.getParent();
82              final Path categoryIndexFile = categoryDir.resolve("index.xml");
83  
84              assertWithMessage("Category index file should exist for check: %s", checkXdocFile)
85                      .that(Files.exists(categoryIndexFile)).isTrue();
86  
87              final Map<String, CheckIndexInfo> indexedChecks = parseCategoryIndex(categoryIndexFile);
88              final Set<String> foundKeys = indexedChecks.keySet();
89  
90              final String checkNotFoundFmt = "Check '%s' from %s not in %s. Found Checks: %s";
91              final String checkNotFoundMsg = String.format(Locale.ROOT,
92                      checkNotFoundFmt,
93                      mainSectionName, checkXdocFile.getFileName(), categoryIndexFile, foundKeys);
94              assertWithMessage(checkNotFoundMsg)
95                      .that(indexedChecks.containsKey(mainSectionName)).isTrue();
96  
97              final CheckIndexInfo checkInfoFromIndex = indexedChecks.get(mainSectionName);
98              final String internalErrorMsg = String.format(Locale.ROOT,
99                  "CheckInfo for '%s' null (key present). Test error.", mainSectionName);
100             assertWithMessage(internalErrorMsg)
101                     .that(checkInfoFromIndex)
102                     .isNotNull();
103 
104             // Validate Href
105             final String expectedHrefFileName = checkXdocFile.getFileName().toString()
106                     .replace(".xml", ".html");
107             final String expectedHref = expectedHrefFileName.toLowerCase(Locale.ROOT)
108                     + "#" + mainSectionName;
109             final String actualHref = checkInfoFromIndex.href();
110 
111             final String hrefMismatchFmt = "Href mismatch for '%s' in %s."
112                     + "Expected: '%s', Found: '%s'";
113             final String hrefMismatchMsg = String.format(Locale.ROOT,
114                     hrefMismatchFmt,
115                     mainSectionName, categoryIndexFile, expectedHref, actualHref);
116             assertWithMessage(hrefMismatchMsg)
117                     .that(actualHref).isEqualTo(expectedHref);
118 
119             // Validate Description
120             final String descriptionFromXdoc = getCheckDescriptionFromXdoc(checkXdocFile);
121             final String descriptionFromIndex = checkInfoFromIndex.description();
122             final String normalizedIndexDesc = normalizeText(descriptionFromIndex);
123             final String normalizedXdocDesc = normalizeText(descriptionFromXdoc);
124 
125             final String descMismatchFmt = "Check '%s' in index '%s': "
126                     + "index description is not a prefix of XDoc description.";
127             final String descMismatchMsg = String.format(Locale.ROOT,
128                     descMismatchFmt,
129                     mainSectionName, categoryIndexFile);
130             assertWithMessage(descMismatchMsg)
131                     .that(normalizedXdocDesc)
132                     .startsWith(normalizedIndexDesc);
133         }
134     }
135 
136     /**
137      * Scans the XDOC_CHECKS_DIR for all individual check XDoc files.
138      * It filters out common files like 'index.xml' and 'property_types.xml'.
139      *
140      * @return A list of paths to check XDoc files.
141      * @throws IOException if an I/O error occurs when walking the path.
142      */
143     private static List<Path> getCheckXdocFiles() throws IOException {
144         try (Stream<Path> paths = Files.walk(XDOC_CHECKS_DIR)) {
145             return paths
146                     .filter(Files::isRegularFile)
147                     .filter(path -> path.toString().endsWith(".xml"))
148                     .filter(path -> !"index.xml".equals(path.getFileName().toString()))
149                     .filter(path -> !"property_types.xml".equals(path.getFileName().toString()))
150                     .toList();
151         }
152     }
153 
154     /**
155      * Extracts the main section name from a check's XDoc file.
156      * This is typically the value of the 'name' attribute of the first &lt;section&gt; tag.
157      *
158      * @param checkXdocFile Path to the check's XDoc file.
159      * @return The main section name.
160      * @throws ParserConfigurationException if a DocumentBuilder cannot be created.
161      * @throws IOException if an I/O error occurs reading the file.
162      * @throws AssertionError if no &lt;section name=...&gt; is found.
163      */
164     private static String getMainSectionName(Path checkXdocFile)
165             throws ParserConfigurationException, IOException {
166         final String content = Files.readString(checkXdocFile);
167         final Document document = XmlUtil.getRawXml(checkXdocFile.toString(), content, content);
168         final NodeList sections = document.getElementsByTagName("section");
169 
170         for (int sectionIndex = 0; sectionIndex < sections.getLength(); sectionIndex++) {
171             final Node sectionNode = sections.item(sectionIndex);
172             if (sectionNode instanceof Element sectionElement
173                   && sectionElement.hasAttribute("name")) {
174                 return sectionElement.getAttribute("name");
175             }
176         }
177         final String errorFormat = "No <section name=...> found in %s";
178         final String errorMsg = String.format(Locale.ROOT, errorFormat, checkXdocFile);
179         throw new AssertionError(errorMsg);
180     }
181 
182     /**
183      * Extracts the description of a check from its XDoc file.
184      * It looks for a &lt;subsection name="Description"&gt; and then tries to find the content
185      * within a &lt;div&gt; or &lt;p&gt; tag.
186      * If not found, it aggregates direct text nodes of the subsection.
187      * As a last resort, it uses the full text content of the subsection.
188      *
189      * @param checkXdocFile Path to the check's XDoc file.
190      * @return The check's description text.
191      * @throws ParserConfigurationException if a DocumentBuilder cannot be created.
192      * @throws IOException if an I/O error occurs reading the file.
193      * @throws AssertionError if no suitable description subsection is found.
194      */
195     private static String getCheckDescriptionFromXdoc(Path checkXdocFile)
196             throws ParserConfigurationException, IOException {
197         final String content = Files.readString(checkXdocFile);
198         final Document document = XmlUtil.getRawXml(checkXdocFile.toString(), content, content);
199         final NodeList subsections = document.getElementsByTagName("subsection");
200 
201         for (int subsectionIdx = 0; subsectionIdx < subsections.getLength(); subsectionIdx++) {
202             final Node subsectionNode = subsections.item(subsectionIdx);
203             if (subsectionNode instanceof Element subsectionElement
204                 && "Description".equals(subsectionElement.getAttribute("name"))) {
205                 final Optional<String> description =
206                             getDescriptionFromSubsection(subsectionElement);
207                 if (description.isPresent()) {
208                     return description.get();
209                 }
210             }
211         }
212         final String errorFormat =
213                 "No <subsection name=\"Description\"> with suitable content in %s";
214         final String errorMsg = String.format(Locale.ROOT, errorFormat, checkXdocFile);
215         throw new AssertionError(errorMsg);
216     }
217 
218     /**
219      * Extracts the description text from a given "Description" subsection element.
220      * It tries multiple strategies in order of preference:
221      * <ol>
222      *   <li>Text content of the first direct child {@code <div> }.</li>
223      *   <li>Text content of the first direct child {@code <p> }.</li>
224      *   <li>Aggregated direct text nodes of the subsection.</li>
225      *   <li>Full text content of the subsection.</li>
226      * </ol>
227      *
228      * @param subsectionElement The "Description" {@code <subsection> } DOM element.
229      * @return An {@link Optional} with the extracted description if found and non-blank,
230      *         otherwise {@link Optional#empty()}.
231      */
232     private static Optional<String> getDescriptionFromSubsection(Element subsectionElement) {
233         Optional<String> description = Optional.empty();
234         final Optional<String> textFromDiv = findTextInChildElements(subsectionElement, "div");
235         if (textFromDiv.isPresent()) {
236             description = textFromDiv;
237         }
238 
239         if (description.isEmpty()) {
240             final Optional<String> textFromP = findTextInChildElements(subsectionElement, "p");
241             if (textFromP.isPresent()) {
242                 description = textFromP;
243             }
244         }
245 
246         if (description.isEmpty()) {
247             final Optional<String> aggregatedText = getAggregatedDirectText(subsectionElement);
248             if (aggregatedText.isPresent()) {
249                 description = aggregatedText;
250             }
251         }
252 
253         if (description.isEmpty()) {
254             final String fullSubsectionText = subsectionElement.getTextContent();
255             if (fullSubsectionText != null && !fullSubsectionText.isBlank()) {
256                 description = Optional.of(fullSubsectionText);
257             }
258         }
259         return description;
260     }
261 
262     /**
263      * Finds the text content of the first non-blank direct child element with the given tag name.
264      *
265      * @param parent The parent DOM element.
266      * @param tagName The tag name to search for.
267      * @return An Optional containing the text if found, otherwise Optional.empty().
268      */
269     private static Optional<String> findTextInChildElements(Element parent, String tagName) {
270         Optional<String> foundText = Optional.empty();
271         for (final Element childElement : getChildrenElementsByTagName(parent, tagName)) {
272             final String text = childElement.getTextContent();
273             if (text != null && !text.isBlank()) {
274                 foundText = Optional.of(text);
275                 break;
276             }
277         }
278         return foundText;
279     }
280 
281     /**
282      * Aggregates text from all direct TEXT_NODE children of a parent element.
283      *
284      * @param parent The parent DOM element.
285      * @return An Optional containing the aggregated non-blank text, otherwise Optional.empty().
286      */
287     private static Optional<String> getAggregatedDirectText(Element parent) {
288         final StringBuilder directTextContent = new StringBuilder(32);
289         final NodeList directChildren = parent.getChildNodes();
290         for (int childIdx = 0; childIdx < directChildren.getLength(); childIdx++) {
291             final Node directChild = directChildren.item(childIdx);
292             if (directChild.getNodeType() == Node.TEXT_NODE) {
293                 directTextContent.append(directChild.getNodeValue());
294             }
295         }
296         final String aggregatedText = directTextContent.toString();
297         Optional<String> result = Optional.empty();
298         if (!aggregatedText.isBlank()) {
299             result = Optional.of(aggregatedText);
300         }
301         return result;
302     }
303 
304     /**
305      * Parses a category index.xml file to extract information about the checks listed.
306      * It iterates through all tables and their rows to find check names, hrefs, and descriptions.
307      *
308      * @param categoryIndexFile Path to the category's index.xml file.
309      * @return A map with check names (from &lt;a&gt; tag text) as keys
310      *         and {@link CheckIndexInfo} objects as values.
311      * @throws ParserConfigurationException if a DocumentBuilder cannot be created.
312      * @throws IOException if an I/O error occurs reading the file.
313      * @throws AssertionError if no &lt;table&gt; is found in the index file.
314      */
315     private static Map<String, CheckIndexInfo> parseCategoryIndex(Path categoryIndexFile)
316             throws ParserConfigurationException, IOException {
317         final Map<String, CheckIndexInfo> indexedChecks = new HashMap<>();
318         final String content = Files.readString(categoryIndexFile);
319         final Document document = XmlUtil.getRawXml(categoryIndexFile.toString(), content, content);
320         final NodeList tableNodes = document.getElementsByTagName("table");
321 
322         if (tableNodes.getLength() == 0) {
323             final String errorMsg = String.format(Locale.ROOT,
324                 "No <table> found in %s", categoryIndexFile);
325             throw new AssertionError(errorMsg);
326         }
327 
328         for (int tableIdx = 0; tableIdx < tableNodes.getLength(); tableIdx++) {
329             final Node tableNode = tableNodes.item(tableIdx);
330             if (tableNode instanceof Element) {
331                 processTableElement((Element) tableNode, indexedChecks);
332             }
333         }
334         return indexedChecks;
335     }
336 
337     /**
338      * Processes a single &lt;table&gt; element from a category index file.
339      * Iterates over its rows, skipping a potential header row, and processes data rows.
340      *
341      * @param tableElement The &lt;table&gt; DOM element.
342      * @param indexedChecks The map to populate with check information.
343      */
344     private static void processTableElement(Element tableElement,
345                                             Map<String, CheckIndexInfo> indexedChecks) {
346         final List<Element> rowElements = getChildrenElementsByTagName(tableElement, "tr");
347         boolean isFirstRowInTable = true;
348 
349         for (final Element rowElement : rowElements) {
350             if (isFirstRowInTable) {
351                 isFirstRowInTable = false;
352                 if (isHeaderRow(rowElement)) {
353                     continue;
354                 }
355             }
356             processDataRow(rowElement, indexedChecks);
357         }
358     }
359 
360     /**
361      * Checks if a given table row element is a header row (i.e., contains &lt;th&gt; elements).
362      *
363      * @param rowElement The &lt;tr&gt; DOM element.
364      * @return True if it's a header row, false otherwise.
365      */
366     private static boolean isHeaderRow(Element rowElement) {
367         return !getChildrenElementsByTagName(rowElement, "th").isEmpty();
368     }
369 
370     /**
371      * Processes a data row (&lt;tr&gt; with &lt;td&gt; children) from a category index table.
372      * Extracts the check name, href, and description.
373      *
374      * @param rowElement The &lt;tr&gt; DOM element representing a data row.
375      * @param indexedChecks The map to populate with check information.
376      */
377     private static void processDataRow(Element rowElement,
378                                        Map<String, CheckIndexInfo> indexedChecks) {
379         final List<Element> cellElements = getChildrenElementsByTagName(rowElement, "td");
380         if (cellElements.size() >= 2) {
381             final Element nameCell = cellElements.get(0);
382             final Element descCell = cellElements.get(1);
383 
384             getFirstChildElementByTagName(nameCell, "a").ifPresent(anchorElement -> {
385                 if (anchorElement.hasAttribute("href")) {
386                     final String checkNameInIndex = anchorElement.getTextContent().trim();
387                     final String href = anchorElement.getAttribute("href");
388                     final String description = descCell.getTextContent();
389                     indexedChecks.put(checkNameInIndex,
390                             new CheckIndexInfo(href, description));
391                 }
392             });
393         }
394     }
395 
396     /**
397      * Retrieves all child elements of a given parent node that match the specified tag name.
398      *
399      * @param parent The parent DOM node.
400      * @param tagName The tag name to filter child elements by.
401      * @return A list of matching child elements. Empty if parent is null or no matches.
402      */
403     private static List<Element> getChildrenElementsByTagName(Node parent, String tagName) {
404         final List<Element> elements = new ArrayList<>();
405         if (parent != null) {
406             final NodeList children = parent.getChildNodes();
407             for (int childIdx = 0; childIdx < children.getLength(); childIdx++) {
408                 final Node child = children.item(childIdx);
409                 if (child instanceof Element && tagName.equals(child.getNodeName())) {
410                     elements.add((Element) child);
411                 }
412             }
413         }
414         return elements;
415     }
416 
417     /**
418      * Retrieves the first child element of a given parent node that matches the specified tag name.
419      *
420      * @param parent The parent DOM node.
421      * @param tagName The tag name to filter child elements by.
422      * @return An {@link Optional} with the first matching child element,
423      *         or empty {@link Optional} if none found or parent is null.
424      */
425     private static Optional<Element> getFirstChildElementByTagName(Node parent, String tagName) {
426         Optional<Element> result = Optional.empty();
427         if (parent != null) {
428             final NodeList children = parent.getChildNodes();
429             for (int childIdx = 0; childIdx < children.getLength(); childIdx++) {
430                 final Node child = children.item(childIdx);
431                 if (child instanceof Element && tagName.equals(child.getNodeName())) {
432                     result = Optional.of((Element) child);
433                     break;
434                 }
435             }
436         }
437         return result;
438     }
439 
440     /**
441      * Normalizes a string by trimming whitespace, replacing non-breaking spaces,
442      * and collapsing multiple internal whitespace characters into a single space.
443      *
444      * @param text The text to normalize.
445      * @return The normalized text, or an empty string if the input is null.
446      */
447     private static String normalizeText(String text) {
448         String normalized = "";
449         if (text != null) {
450             normalized = text.replace("\u00a0", " ").trim().replaceAll("\\s+", " ");
451         }
452         return normalized;
453     }
454 
455     /**
456      * Stores information about a check as parsed from an index.xml file.
457      * It holds the hyperlink reference (href) and the description text.
458      */
459     private static final class CheckIndexInfo {
460         private final String hrefValue;
461         private final String descriptionText;
462 
463         /**
464          * Constructs a new CheckIndexInfo instance.
465          *
466          * @param href The href attribute for the check's link.
467          * @param description The description text for the check.
468          */
469         /* package */ CheckIndexInfo(String href, String description) {
470             hrefValue = href;
471             descriptionText = description;
472         }
473 
474         /**
475          * Gets the href attribute for the check's link.
476          *
477          * @return The href string.
478          */
479         public String href() {
480             return hrefValue;
481         }
482 
483         /**
484          * Gets the description text for the check.
485          *
486          * @return The description string.
487          */
488         public String description() {
489             return descriptionText;
490         }
491     }
492 }