1 /////////////////////////////////////////////////////////////////////////////////////////////// 2 // checkstyle: Checks Java source code and other text files for adherence to a set of rules. 3 // Copyright (C) 2001-2025 the original author or authors. 4 // 5 // This library is free software; you can redistribute it and/or 6 // modify it under the terms of the GNU Lesser General Public 7 // License as published by the Free Software Foundation; either 8 // version 2.1 of the License, or (at your option) any later version. 9 // 10 // This library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 // Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public 16 // License along with this library; if not, write to the Free Software 17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 /////////////////////////////////////////////////////////////////////////////////////////////// 19 20 package com.puppycrawl.tools.checkstyle.checks.regexp; 21 22 import java.util.regex.Matcher; 23 import java.util.regex.Pattern; 24 25 import com.puppycrawl.tools.checkstyle.FileStatefulCheck; 26 import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 27 import com.puppycrawl.tools.checkstyle.api.DetailAST; 28 import com.puppycrawl.tools.checkstyle.api.FileContents; 29 import com.puppycrawl.tools.checkstyle.api.FileText; 30 import com.puppycrawl.tools.checkstyle.api.LineColumn; 31 import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 32 33 /** 34 * <div> 35 * Checks that a specified pattern exists, exists less than 36 * a set number of times, or does not exist in the file. 37 * </div> 38 * 39 * <p> 40 * This check combines all the functionality provided by 41 * <a href="https://checkstyle.org/checks/header/regexpheader.html">RegexpHeader</a> 42 * except supplying the regular expression from a file. 43 * </p> 44 * 45 * <p> 46 * It differs from them in that it works in multiline mode. Its regular expression 47 * can span multiple lines and it checks this against the whole file at once. 48 * The others work in single-line mode. Their single or multiple regular expressions 49 * can only span one line. They check each of these against each line in the file in turn. 50 * </p> 51 * 52 * <p> 53 * <b>Note:</b> Because of the different mode of operation there may be some 54 * changes in the regular expressions used to achieve a particular end. 55 * </p> 56 * 57 * <p> 58 * In multiline mode... 59 * </p> 60 * <ul> 61 * <li> 62 * {@code ^} means the beginning of a line, as opposed to beginning of the input. 63 * </li> 64 * <li> 65 * For beginning of the input use {@code \A}. 66 * </li> 67 * <li> 68 * {@code $} means the end of a line, as opposed to the end of the input. 69 * </li> 70 * <li> 71 * For end of input use {@code \Z}. 72 * </li> 73 * <li> 74 * Each line in the file is terminated with a line feed character. 75 * </li> 76 * </ul> 77 * 78 * <p> 79 * <b>Note:</b> Not all regular expression engines are created equal. 80 * Some provide extra functions that others do not and some elements 81 * of the syntax may vary. This check makes use of the 82 * <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/regex/package-summary.html"> 83 * java.util.regex package</a>; please check its documentation for details 84 * of how to construct a regular expression to achieve a particular goal. 85 * </p> 86 * 87 * <p> 88 * <b>Note:</b> When entering a regular expression as a parameter in 89 * the XML config file you must also take into account the XML rules. e.g. 90 * if you want to match a < symbol you need to enter &lt;. 91 * The regular expression should be entered on one line. 92 * </p> 93 * 94 * <p> 95 * <b>Note:</b> To search for parentheses () in a regular expression 96 * you must escape them like \(\). This is required by the regexp engine, 97 * otherwise it will think they are special instruction characters. 98 * </p> 99 * 100 * <p> 101 * <b>Note:</b> To search for things that mean something in XML, like 102 * < you need to escape them like &lt;. This is required so the 103 * XML parser does not act on them, but instead passes the correct 104 * character to the regexp engine. 105 * </p> 106 * 107 * @since 4.0 108 */ 109 @FileStatefulCheck 110 public class RegexpCheck extends AbstractCheck { 111 112 /** 113 * A key is pointing to the warning message text in "messages.properties" 114 * file. 115 */ 116 public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp"; 117 118 /** 119 * A key is pointing to the warning message text in "messages.properties" 120 * file. 121 */ 122 public static final String MSG_REQUIRED_REGEXP = "required.regexp"; 123 124 /** 125 * A key is pointing to the warning message text in "messages.properties" 126 * file. 127 */ 128 public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp"; 129 130 /** Default duplicate limit. */ 131 private static final int DEFAULT_DUPLICATE_LIMIT = -1; 132 133 /** Default error report limit. */ 134 private static final int DEFAULT_ERROR_LIMIT = 100; 135 136 /** Error count exceeded message. */ 137 private static final String ERROR_LIMIT_EXCEEDED_MESSAGE = 138 "The error limit has been exceeded, " 139 + "the check is aborting, there may be more unreported errors."; 140 141 /** 142 * Specify message which is used to notify about violations, 143 * if empty then the default (hard-coded) message is used. 144 */ 145 private String message; 146 147 /** Control whether to ignore matches found within comments. */ 148 private boolean ignoreComments; 149 150 /** Control whether the pattern is required or illegal. */ 151 private boolean illegalPattern; 152 153 /** Specify the maximum number of violations before the check will abort. */ 154 private int errorLimit = DEFAULT_ERROR_LIMIT; 155 156 /** 157 * Control whether to check for duplicates of a required pattern, 158 * any negative value means no checking for duplicates, 159 * any positive value is used as the maximum number of allowed duplicates, 160 * if the limit is exceeded violations will be logged. 161 */ 162 private int duplicateLimit; 163 164 /** Boolean to say if we should check for duplicates. */ 165 private boolean checkForDuplicates; 166 167 /** Specify the pattern to match against. */ 168 private Pattern format = Pattern.compile("^$", Pattern.MULTILINE); 169 170 /** 171 * Setter to specify message which is used to notify about violations, 172 * if empty then the default (hard-coded) message is used. 173 * 174 * @param message custom message which should be used in report. 175 * @since 4.0 176 */ 177 public void setMessage(String message) { 178 this.message = message; 179 } 180 181 /** 182 * Setter to control whether to ignore matches found within comments. 183 * 184 * @param ignoreComments True if comments should be ignored. 185 * @since 4.0 186 */ 187 public void setIgnoreComments(boolean ignoreComments) { 188 this.ignoreComments = ignoreComments; 189 } 190 191 /** 192 * Setter to control whether the pattern is required or illegal. 193 * 194 * @param illegalPattern True if pattern is not allowed. 195 * @since 4.0 196 */ 197 public void setIllegalPattern(boolean illegalPattern) { 198 this.illegalPattern = illegalPattern; 199 } 200 201 /** 202 * Setter to specify the maximum number of violations before the check will abort. 203 * 204 * @param errorLimit the number of errors to report. 205 * @since 4.0 206 */ 207 public void setErrorLimit(int errorLimit) { 208 this.errorLimit = errorLimit; 209 } 210 211 /** 212 * Setter to control whether to check for duplicates of a required pattern, 213 * any negative value means no checking for duplicates, 214 * any positive value is used as the maximum number of allowed duplicates, 215 * if the limit is exceeded violations will be logged. 216 * 217 * @param duplicateLimit negative values mean no duplicate checking, 218 * any positive value is used as the limit. 219 * @since 4.0 220 */ 221 public void setDuplicateLimit(int duplicateLimit) { 222 this.duplicateLimit = duplicateLimit; 223 checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT; 224 } 225 226 /** 227 * Setter to specify the pattern to match against. 228 * 229 * @param pattern the new pattern 230 * @since 4.0 231 */ 232 public final void setFormat(Pattern pattern) { 233 format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE); 234 } 235 236 @Override 237 public int[] getDefaultTokens() { 238 return getRequiredTokens(); 239 } 240 241 @Override 242 public int[] getAcceptableTokens() { 243 return getRequiredTokens(); 244 } 245 246 @Override 247 public int[] getRequiredTokens() { 248 return CommonUtil.EMPTY_INT_ARRAY; 249 } 250 251 @Override 252 public void beginTree(DetailAST rootAST) { 253 processRegexpMatches(); 254 } 255 256 /** 257 * Processes the regexp matches and logs the number of errors in the file. 258 * 259 */ 260 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 261 @SuppressWarnings("deprecation") 262 private void processRegexpMatches() { 263 final Matcher matcher = format.matcher(getFileContents().getText().getFullText()); 264 int errorCount = 0; 265 int matchCount = 0; 266 final FileText text = getFileContents().getText(); 267 while (errorCount < errorLimit && matcher.find()) { 268 final LineColumn start = text.lineColumn(matcher.start()); 269 final int startLine = start.getLine(); 270 271 final boolean ignore = isIgnore(startLine, text, start, matcher); 272 if (!ignore) { 273 matchCount++; 274 if (illegalPattern || checkForDuplicates 275 && matchCount - 1 > duplicateLimit) { 276 errorCount++; 277 logMessage(startLine, errorCount); 278 } 279 } 280 } 281 if (!illegalPattern && matchCount == 0) { 282 final String msg = getMessage(errorCount); 283 log(1, MSG_REQUIRED_REGEXP, msg); 284 } 285 } 286 287 /** 288 * Detect ignore situation. 289 * 290 * @param startLine position of line 291 * @param text file text 292 * @param start line column 293 * @param matcher The matcher 294 * @return true is that need to be ignored 295 */ 296 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 297 @SuppressWarnings("deprecation") 298 private boolean isIgnore(int startLine, FileText text, LineColumn start, Matcher matcher) { 299 final LineColumn end; 300 if (matcher.end() == 0) { 301 end = text.lineColumn(0); 302 } 303 else { 304 end = text.lineColumn(matcher.end() - 1); 305 } 306 boolean ignore = false; 307 if (ignoreComments) { 308 final FileContents theFileContents = getFileContents(); 309 final int startColumn = start.getColumn(); 310 final int endLine = end.getLine(); 311 final int endColumn = end.getColumn(); 312 ignore = theFileContents.hasIntersectionWithComment(startLine, 313 startColumn, endLine, endColumn); 314 } 315 return ignore; 316 } 317 318 /** 319 * Displays the right message. 320 * 321 * @param lineNumber the line number the message relates to. 322 * @param errorCount number of errors in the file. 323 */ 324 private void logMessage(int lineNumber, int errorCount) { 325 final String msg = getMessage(errorCount); 326 327 if (illegalPattern) { 328 log(lineNumber, MSG_ILLEGAL_REGEXP, msg); 329 } 330 else { 331 log(lineNumber, MSG_DUPLICATE_REGEXP, msg); 332 } 333 } 334 335 /** 336 * Provide right message. 337 * 338 * @param errorCount number of errors in the file. 339 * @return message for violation. 340 */ 341 private String getMessage(int errorCount) { 342 String msg; 343 344 if (message == null || message.isEmpty()) { 345 msg = format.pattern(); 346 } 347 else { 348 msg = message; 349 } 350 351 if (errorCount >= errorLimit) { 352 msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg; 353 } 354 355 return msg; 356 } 357 }