001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2025 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.site; 021 022import java.nio.file.Path; 023import java.util.regex.Pattern; 024 025import org.apache.maven.doxia.macro.AbstractMacro; 026import org.apache.maven.doxia.macro.Macro; 027import org.apache.maven.doxia.macro.MacroExecutionException; 028import org.apache.maven.doxia.macro.MacroRequest; 029import org.apache.maven.doxia.sink.Sink; 030import org.codehaus.plexus.component.annotations.Component; 031 032import com.puppycrawl.tools.checkstyle.api.DetailNode; 033import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 034 035/** 036 * A macro that inserts the first sentence (summary) of a Check module's Javadoc, 037 * cleaned of HTML tags and links for safe inclusion in xdoc XML. 038 * 039 * <p>This class is used during site generation to dynamically populate 040 * {@code checks.xml} content using module-level documentation.</p> 041 */ 042@Component(role = Macro.class, hint = "checks") 043public class ChecksXmlMacro extends AbstractMacro { 044 045 /** Pattern to remove structural HTML tags like div, p, span, em, strong. */ 046 private static final Pattern STRUCTURAL_TAG_PATTERN = 047 Pattern.compile("(?is)</?(?:div|p|span|em|strong)[^>]*>"); 048 049 /** Pattern to remove HTML anchor tags (<a>...</a>). */ 050 private static final Pattern ANCHOR_TAG_PATTERN = 051 Pattern.compile("(?is)<a[^>]*?>|</a>"); 052 053 /** Pattern for collapsing multiple whitespace characters into one. */ 054 private static final Pattern SPACE_PATTERN = Pattern.compile("\\s+"); 055 056 @Override 057 public void execute(Sink sink, MacroRequest request) throws MacroExecutionException { 058 final Object param = request.getParameter("modulePath"); 059 if (param == null) { 060 throw new MacroExecutionException("Parameter 'modulePath' is required."); 061 } 062 063 final Path modulePath = Path.of((String) param); 064 final String moduleName = CommonUtil.getFileNameWithoutExtension(modulePath.toString()); 065 066 final DetailNode moduleJavadoc = SiteUtil.getModuleJavadoc(moduleName, modulePath); 067 if (moduleJavadoc == null) { 068 throw new MacroExecutionException( 069 "Javadoc of module " + moduleName + " not found."); 070 } 071 072 final String moduleDescription = 073 ModuleJavadocParsingUtil.getModuleDescription(moduleJavadoc); 074 final String cleanDescription = sanitize(moduleDescription); 075 final String summarySentence = extractFirstSentence(cleanDescription); 076 077 final String textToWrap = summarySentence.trim(); 078 final String formatted = wrapText(textToWrap, 70); 079 sink.rawText(formatted); 080 } 081 082 /** 083 * Extracts the first sentence (until the first period followed by whitespace or end). 084 * 085 * @param description the full module description text 086 * @return first sentence of description 087 */ 088 private static String extractFirstSentence(String description) { 089 String result = ""; 090 if (description != null) { 091 int endIndex = -1; 092 final int descriptionLength = description.length(); 093 for (int index = 0; index < descriptionLength; index++) { 094 if (description.charAt(index) == '.' 095 && (index == descriptionLength - 1 096 || Character.isWhitespace(description.charAt(index + 1)) 097 || description.charAt(index + 1) == '<')) { 098 endIndex = index; 099 break; 100 } 101 } 102 if (endIndex == -1) { 103 result = description; 104 } 105 else { 106 result = description.substring(0, endIndex + 1); 107 } 108 } 109 return result.trim(); 110 } 111 112 /** 113 * Wraps text at approximately {@code wrapLimit} characters without breaking words. 114 * Ensures total line length (including indentation and XML tags) ≤ 100 chars. 115 * 116 * @param text the text to wrap 117 * @param wrapLimit maximum number of characters per line before wrapping 118 * @return wrapped text 119 */ 120 private static String wrapText(String text, int wrapLimit) { 121 final String result; 122 if (text == null || text.isEmpty()) { 123 result = ""; 124 } 125 else { 126 final StringBuilder wrapped = new StringBuilder(text.length() + 32); 127 String remaining = text.trim(); 128 boolean isFirstLine = true; 129 130 final String continuationIndent = ModuleJavadocParsingUtil.INDENT_LEVEL_14; 131 132 int remainingLength = remaining.length(); 133 while (remainingLength > wrapLimit) { 134 int breakIndex = remaining.lastIndexOf(' ', wrapLimit); 135 if (breakIndex <= 0 || breakIndex > remainingLength) { 136 breakIndex = Math.min(wrapLimit, remainingLength); 137 } 138 139 final int safeBreakIndex = Math.min(Math.max(0, breakIndex), remainingLength); 140 141 if (!isFirstLine) { 142 wrapped.append(continuationIndent); 143 } 144 wrapped.append(remaining, 0, safeBreakIndex); 145 146 remaining = remaining.substring(safeBreakIndex).trim(); 147 remainingLength = remaining.length(); 148 isFirstLine = false; 149 } 150 151 if (!isFirstLine) { 152 wrapped.append(continuationIndent); 153 } 154 155 wrapped.append(remaining); 156 result = wrapped.toString(); 157 } 158 return result; 159 } 160 161 /** 162 * Cleans up unwanted HTML tags, leaving readable text only. 163 * Preserves inline formatting tags like {@code <code>}. 164 * 165 * @param html the HTML text to clean 166 * @return sanitized text without unwanted tags 167 */ 168 private static String sanitize(String html) { 169 final String result; 170 if (html == null || html.isEmpty()) { 171 result = ""; 172 } 173 else { 174 String cleaned = ANCHOR_TAG_PATTERN.matcher(html).replaceAll(""); 175 cleaned = STRUCTURAL_TAG_PATTERN.matcher(cleaned).replaceAll(""); 176 cleaned = SPACE_PATTERN.matcher(cleaned).replaceAll(" "); 177 result = cleaned.trim(); 178 } 179 return result; 180 } 181}