1 /////////////////////////////////////////////////////////////////////////////////////////////// 2 // checkstyle: Checks Java source code and other text files for adherence to a set of rules. 3 // Copyright (C) 2001-2025 the original author or authors. 4 // 5 // This library is free software; you can redistribute it and/or 6 // modify it under the terms of the GNU Lesser General Public 7 // License as published by the Free Software Foundation; either 8 // version 2.1 of the License, or (at your option) any later version. 9 // 10 // This library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 // Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public 16 // License along with this library; if not, write to the Free Software 17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 /////////////////////////////////////////////////////////////////////////////////////////////// 19 20 package com.puppycrawl.tools.checkstyle.checks.regexp; 21 22 import java.util.regex.Matcher; 23 import java.util.regex.Pattern; 24 25 import com.puppycrawl.tools.checkstyle.FileStatefulCheck; 26 import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 27 import com.puppycrawl.tools.checkstyle.api.DetailAST; 28 import com.puppycrawl.tools.checkstyle.api.FileContents; 29 import com.puppycrawl.tools.checkstyle.api.FileText; 30 import com.puppycrawl.tools.checkstyle.api.LineColumn; 31 import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 32 33 /** 34 * <div> 35 * Checks that a specified pattern exists, exists less than 36 * a set number of times, or does not exist in the file. 37 * </div> 38 * 39 * <p> 40 * This check combines all the functionality provided by 41 * <a href="https://checkstyle.org/checks/header/regexpheader.html#RegexpHeader">RegexpHeader</a> 42 * except supplying the regular expression from a file. 43 * </p> 44 * 45 * <p> 46 * It differs from them in that it works in multiline mode. Its regular expression 47 * can span multiple lines and it checks this against the whole file at once. 48 * The others work in single-line mode. Their single or multiple regular expressions 49 * can only span one line. They check each of these against each line in the file in turn. 50 * </p> 51 * 52 * <p> 53 * <b>Note:</b> Because of the different mode of operation there may be some 54 * changes in the regular expressions used to achieve a particular end. 55 * </p> 56 * 57 * <p> 58 * In multiline mode... 59 * </p> 60 * <ul> 61 * <li> 62 * {@code ^} means the beginning of a line, as opposed to beginning of the input. 63 * </li> 64 * <li> 65 * For beginning of the input use {@code \A}. 66 * </li> 67 * <li> 68 * {@code $} means the end of a line, as opposed to the end of the input. 69 * </li> 70 * <li> 71 * For end of input use {@code \Z}. 72 * </li> 73 * <li> 74 * Each line in the file is terminated with a line feed character. 75 * </li> 76 * </ul> 77 * 78 * <p> 79 * <b>Note:</b> Not all regular expression engines are created equal. 80 * Some provide extra functions that others do not and some elements 81 * of the syntax may vary. This check makes use of the 82 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html"> 83 * java.util.regex package</a>; please check its documentation for details 84 * of how to construct a regular expression to achieve a particular goal. 85 * </p> 86 * 87 * <p> 88 * <b>Note:</b> When entering a regular expression as a parameter in 89 * the XML config file you must also take into account the XML rules. e.g. 90 * if you want to match a < symbol you need to enter &lt;. 91 * The regular expression should be entered on one line. 92 * </p> 93 * 94 * <p> 95 * <b>Note:</b> To search for parentheses () in a regular expression 96 * you must escape them like \(\). This is required by the regexp engine, 97 * otherwise it will think they are special instruction characters. 98 * </p> 99 * 100 * <p> 101 * <b>Note:</b> To search for things that mean something in XML, like 102 * < you need to escape them like &lt;. This is required so the 103 * XML parser does not act on them, but instead passes the correct 104 * character to the regexp engine. 105 * </p> 106 * <ul> 107 * <li> 108 * Property {@code duplicateLimit} - Control whether to check for duplicates 109 * of a required pattern, any negative value means no checking for duplicates, 110 * any positive value is used as the maximum number of allowed duplicates, 111 * if the limit is exceeded violations will be logged. 112 * Type is {@code int}. 113 * Default value is {@code 0}. 114 * </li> 115 * <li> 116 * Property {@code errorLimit} - Specify the maximum number of violations before 117 * the check will abort. 118 * Type is {@code int}. 119 * Default value is {@code 100}. 120 * </li> 121 * <li> 122 * Property {@code format} - Specify the pattern to match against. 123 * Type is {@code java.util.regex.Pattern}. 124 * Default value is {@code "^$"}. 125 * </li> 126 * <li> 127 * Property {@code ignoreComments} - Control whether to ignore matches found within comments. 128 * Type is {@code boolean}. 129 * Default value is {@code false}. 130 * </li> 131 * <li> 132 * Property {@code illegalPattern} - Control whether the pattern is required or illegal. 133 * Type is {@code boolean}. 134 * Default value is {@code false}. 135 * </li> 136 * <li> 137 * Property {@code message} - Specify message which is used to notify about 138 * violations, if empty then the default (hard-coded) message is used. 139 * Type is {@code java.lang.String}. 140 * Default value is {@code null}. 141 * </li> 142 * </ul> 143 * 144 * <p> 145 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker} 146 * </p> 147 * 148 * <p> 149 * Violation Message Keys: 150 * </p> 151 * <ul> 152 * <li> 153 * {@code duplicate.regexp} 154 * </li> 155 * <li> 156 * {@code illegal.regexp} 157 * </li> 158 * <li> 159 * {@code required.regexp} 160 * </li> 161 * </ul> 162 * 163 * @since 4.0 164 */ 165 @FileStatefulCheck 166 public class RegexpCheck extends AbstractCheck { 167 168 /** 169 * A key is pointing to the warning message text in "messages.properties" 170 * file. 171 */ 172 public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp"; 173 174 /** 175 * A key is pointing to the warning message text in "messages.properties" 176 * file. 177 */ 178 public static final String MSG_REQUIRED_REGEXP = "required.regexp"; 179 180 /** 181 * A key is pointing to the warning message text in "messages.properties" 182 * file. 183 */ 184 public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp"; 185 186 /** Default duplicate limit. */ 187 private static final int DEFAULT_DUPLICATE_LIMIT = -1; 188 189 /** Default error report limit. */ 190 private static final int DEFAULT_ERROR_LIMIT = 100; 191 192 /** Error count exceeded message. */ 193 private static final String ERROR_LIMIT_EXCEEDED_MESSAGE = 194 "The error limit has been exceeded, " 195 + "the check is aborting, there may be more unreported errors."; 196 197 /** 198 * Specify message which is used to notify about violations, 199 * if empty then the default (hard-coded) message is used. 200 */ 201 private String message; 202 203 /** Control whether to ignore matches found within comments. */ 204 private boolean ignoreComments; 205 206 /** Control whether the pattern is required or illegal. */ 207 private boolean illegalPattern; 208 209 /** Specify the maximum number of violations before the check will abort. */ 210 private int errorLimit = DEFAULT_ERROR_LIMIT; 211 212 /** 213 * Control whether to check for duplicates of a required pattern, 214 * any negative value means no checking for duplicates, 215 * any positive value is used as the maximum number of allowed duplicates, 216 * if the limit is exceeded violations will be logged. 217 */ 218 private int duplicateLimit; 219 220 /** Boolean to say if we should check for duplicates. */ 221 private boolean checkForDuplicates; 222 223 /** Specify the pattern to match against. */ 224 private Pattern format = Pattern.compile("^$", Pattern.MULTILINE); 225 226 /** 227 * Setter to specify message which is used to notify about violations, 228 * if empty then the default (hard-coded) message is used. 229 * 230 * @param message custom message which should be used in report. 231 * @since 4.0 232 */ 233 public void setMessage(String message) { 234 this.message = message; 235 } 236 237 /** 238 * Setter to control whether to ignore matches found within comments. 239 * 240 * @param ignoreComments True if comments should be ignored. 241 * @since 4.0 242 */ 243 public void setIgnoreComments(boolean ignoreComments) { 244 this.ignoreComments = ignoreComments; 245 } 246 247 /** 248 * Setter to control whether the pattern is required or illegal. 249 * 250 * @param illegalPattern True if pattern is not allowed. 251 * @since 4.0 252 */ 253 public void setIllegalPattern(boolean illegalPattern) { 254 this.illegalPattern = illegalPattern; 255 } 256 257 /** 258 * Setter to specify the maximum number of violations before the check will abort. 259 * 260 * @param errorLimit the number of errors to report. 261 * @since 4.0 262 */ 263 public void setErrorLimit(int errorLimit) { 264 this.errorLimit = errorLimit; 265 } 266 267 /** 268 * Setter to control whether to check for duplicates of a required pattern, 269 * any negative value means no checking for duplicates, 270 * any positive value is used as the maximum number of allowed duplicates, 271 * if the limit is exceeded violations will be logged. 272 * 273 * @param duplicateLimit negative values mean no duplicate checking, 274 * any positive value is used as the limit. 275 * @since 4.0 276 */ 277 public void setDuplicateLimit(int duplicateLimit) { 278 this.duplicateLimit = duplicateLimit; 279 checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT; 280 } 281 282 /** 283 * Setter to specify the pattern to match against. 284 * 285 * @param pattern the new pattern 286 * @since 4.0 287 */ 288 public final void setFormat(Pattern pattern) { 289 format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE); 290 } 291 292 @Override 293 public int[] getDefaultTokens() { 294 return getRequiredTokens(); 295 } 296 297 @Override 298 public int[] getAcceptableTokens() { 299 return getRequiredTokens(); 300 } 301 302 @Override 303 public int[] getRequiredTokens() { 304 return CommonUtil.EMPTY_INT_ARRAY; 305 } 306 307 @Override 308 public void beginTree(DetailAST rootAST) { 309 processRegexpMatches(); 310 } 311 312 /** 313 * Processes the regexp matches and logs the number of errors in the file. 314 * 315 */ 316 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 317 @SuppressWarnings("deprecation") 318 private void processRegexpMatches() { 319 final Matcher matcher = format.matcher(getFileContents().getText().getFullText()); 320 int errorCount = 0; 321 int matchCount = 0; 322 final FileText text = getFileContents().getText(); 323 while (errorCount < errorLimit && matcher.find()) { 324 final LineColumn start = text.lineColumn(matcher.start()); 325 final int startLine = start.getLine(); 326 327 final boolean ignore = isIgnore(startLine, text, start, matcher); 328 if (!ignore) { 329 matchCount++; 330 if (illegalPattern || checkForDuplicates 331 && matchCount - 1 > duplicateLimit) { 332 errorCount++; 333 logMessage(startLine, errorCount); 334 } 335 } 336 } 337 if (!illegalPattern && matchCount == 0) { 338 final String msg = getMessage(errorCount); 339 log(1, MSG_REQUIRED_REGEXP, msg); 340 } 341 } 342 343 /** 344 * Detect ignore situation. 345 * 346 * @param startLine position of line 347 * @param text file text 348 * @param start line column 349 * @param matcher The matcher 350 * @return true is that need to be ignored 351 */ 352 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 353 @SuppressWarnings("deprecation") 354 private boolean isIgnore(int startLine, FileText text, LineColumn start, Matcher matcher) { 355 final LineColumn end; 356 if (matcher.end() == 0) { 357 end = text.lineColumn(0); 358 } 359 else { 360 end = text.lineColumn(matcher.end() - 1); 361 } 362 boolean ignore = false; 363 if (ignoreComments) { 364 final FileContents theFileContents = getFileContents(); 365 final int startColumn = start.getColumn(); 366 final int endLine = end.getLine(); 367 final int endColumn = end.getColumn(); 368 ignore = theFileContents.hasIntersectionWithComment(startLine, 369 startColumn, endLine, endColumn); 370 } 371 return ignore; 372 } 373 374 /** 375 * Displays the right message. 376 * 377 * @param lineNumber the line number the message relates to. 378 * @param errorCount number of errors in the file. 379 */ 380 private void logMessage(int lineNumber, int errorCount) { 381 final String msg = getMessage(errorCount); 382 383 if (illegalPattern) { 384 log(lineNumber, MSG_ILLEGAL_REGEXP, msg); 385 } 386 else { 387 log(lineNumber, MSG_DUPLICATE_REGEXP, msg); 388 } 389 } 390 391 /** 392 * Provide right message. 393 * 394 * @param errorCount number of errors in the file. 395 * @return message for violation. 396 */ 397 private String getMessage(int errorCount) { 398 String msg; 399 400 if (message == null || message.isEmpty()) { 401 msg = format.pattern(); 402 } 403 else { 404 msg = message; 405 } 406 407 if (errorCount >= errorLimit) { 408 msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg; 409 } 410 411 return msg; 412 } 413 }