1 /////////////////////////////////////////////////////////////////////////////////////////////// 2 // checkstyle: Checks Java source code and other text files for adherence to a set of rules. 3 // Copyright (C) 2001-2024 the original author or authors. 4 // 5 // This library is free software; you can redistribute it and/or 6 // modify it under the terms of the GNU Lesser General Public 7 // License as published by the Free Software Foundation; either 8 // version 2.1 of the License, or (at your option) any later version. 9 // 10 // This library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 // Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public 16 // License along with this library; if not, write to the Free Software 17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 /////////////////////////////////////////////////////////////////////////////////////////////// 19 20 package com.puppycrawl.tools.checkstyle.checks.regexp; 21 22 import java.util.regex.Matcher; 23 import java.util.regex.Pattern; 24 25 import com.puppycrawl.tools.checkstyle.FileStatefulCheck; 26 import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 27 import com.puppycrawl.tools.checkstyle.api.DetailAST; 28 import com.puppycrawl.tools.checkstyle.api.FileContents; 29 import com.puppycrawl.tools.checkstyle.api.FileText; 30 import com.puppycrawl.tools.checkstyle.api.LineColumn; 31 import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 32 33 /** 34 * <p> 35 * Checks that a specified pattern exists, exists less than 36 * a set number of times, or does not exist in the file. 37 * </p> 38 * <p> 39 * This check combines all the functionality provided by 40 * <a href="https://checkstyle.org/checks/header/regexpheader.html#RegexpHeader">RegexpHeader</a> 41 * except supplying the regular expression from a file. 42 * </p> 43 * <p> 44 * It differs from them in that it works in multiline mode. Its regular expression 45 * can span multiple lines and it checks this against the whole file at once. 46 * The others work in single-line mode. Their single or multiple regular expressions 47 * can only span one line. They check each of these against each line in the file in turn. 48 * </p> 49 * <p> 50 * <b>Note:</b> Because of the different mode of operation there may be some 51 * changes in the regular expressions used to achieve a particular end. 52 * </p> 53 * <p> 54 * In multiline mode... 55 * </p> 56 * <ul> 57 * <li> 58 * {@code ^} means the beginning of a line, as opposed to beginning of the input. 59 * </li> 60 * <li> 61 * For beginning of the input use {@code \A}. 62 * </li> 63 * <li> 64 * {@code $} means the end of a line, as opposed to the end of the input. 65 * </li> 66 * <li> 67 * For end of input use {@code \Z}. 68 * </li> 69 * <li> 70 * Each line in the file is terminated with a line feed character. 71 * </li> 72 * </ul> 73 * <p> 74 * <b>Note:</b> Not all regular expression engines are created equal. 75 * Some provide extra functions that others do not and some elements 76 * of the syntax may vary. This check makes use of the 77 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html"> 78 * java.util.regex package</a>; please check its documentation for details 79 * of how to construct a regular expression to achieve a particular goal. 80 * </p> 81 * <p> 82 * <b>Note:</b> When entering a regular expression as a parameter in 83 * the XML config file you must also take into account the XML rules. e.g. 84 * if you want to match a < symbol you need to enter &lt;. 85 * The regular expression should be entered on one line. 86 * </p> 87 * <ul> 88 * <li> 89 * Property {@code duplicateLimit} - Control whether to check for duplicates 90 * of a required pattern, any negative value means no checking for duplicates, 91 * any positive value is used as the maximum number of allowed duplicates, 92 * if the limit is exceeded violations will be logged. 93 * Type is {@code int}. 94 * Default value is {@code 0}. 95 * </li> 96 * <li> 97 * Property {@code errorLimit} - Specify the maximum number of violations before 98 * the check will abort. 99 * Type is {@code int}. 100 * Default value is {@code 100}. 101 * </li> 102 * <li> 103 * Property {@code format} - Specify the pattern to match against. 104 * Type is {@code java.util.regex.Pattern}. 105 * Default value is {@code "^$"}. 106 * </li> 107 * <li> 108 * Property {@code ignoreComments} - Control whether to ignore matches found within comments. 109 * Type is {@code boolean}. 110 * Default value is {@code false}. 111 * </li> 112 * <li> 113 * Property {@code illegalPattern} - Control whether the pattern is required or illegal. 114 * Type is {@code boolean}. 115 * Default value is {@code false}. 116 * </li> 117 * <li> 118 * Property {@code message} - Specify message which is used to notify about 119 * violations, if empty then the default (hard-coded) message is used. 120 * Type is {@code java.lang.String}. 121 * Default value is {@code null}. 122 * </li> 123 * </ul> 124 * <p> 125 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker} 126 * </p> 127 * <p> 128 * Violation Message Keys: 129 * </p> 130 * <ul> 131 * <li> 132 * {@code duplicate.regexp} 133 * </li> 134 * <li> 135 * {@code illegal.regexp} 136 * </li> 137 * <li> 138 * {@code required.regexp} 139 * </li> 140 * </ul> 141 * 142 * @since 4.0 143 */ 144 @FileStatefulCheck 145 public class RegexpCheck extends AbstractCheck { 146 147 /** 148 * A key is pointing to the warning message text in "messages.properties" 149 * file. 150 */ 151 public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp"; 152 153 /** 154 * A key is pointing to the warning message text in "messages.properties" 155 * file. 156 */ 157 public static final String MSG_REQUIRED_REGEXP = "required.regexp"; 158 159 /** 160 * A key is pointing to the warning message text in "messages.properties" 161 * file. 162 */ 163 public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp"; 164 165 /** Default duplicate limit. */ 166 private static final int DEFAULT_DUPLICATE_LIMIT = -1; 167 168 /** Default error report limit. */ 169 private static final int DEFAULT_ERROR_LIMIT = 100; 170 171 /** Error count exceeded message. */ 172 private static final String ERROR_LIMIT_EXCEEDED_MESSAGE = 173 "The error limit has been exceeded, " 174 + "the check is aborting, there may be more unreported errors."; 175 176 /** 177 * Specify message which is used to notify about violations, 178 * if empty then the default (hard-coded) message is used. 179 */ 180 private String message; 181 182 /** Control whether to ignore matches found within comments. */ 183 private boolean ignoreComments; 184 185 /** Control whether the pattern is required or illegal. */ 186 private boolean illegalPattern; 187 188 /** Specify the maximum number of violations before the check will abort. */ 189 private int errorLimit = DEFAULT_ERROR_LIMIT; 190 191 /** 192 * Control whether to check for duplicates of a required pattern, 193 * any negative value means no checking for duplicates, 194 * any positive value is used as the maximum number of allowed duplicates, 195 * if the limit is exceeded violations will be logged. 196 */ 197 private int duplicateLimit; 198 199 /** Boolean to say if we should check for duplicates. */ 200 private boolean checkForDuplicates; 201 202 /** Tracks number of matches made. */ 203 private int matchCount; 204 205 /** Tracks number of errors. */ 206 private int errorCount; 207 208 /** Specify the pattern to match against. */ 209 private Pattern format = Pattern.compile("^$", Pattern.MULTILINE); 210 211 /** The matcher. */ 212 private Matcher matcher; 213 214 /** 215 * Setter to specify message which is used to notify about violations, 216 * if empty then the default (hard-coded) message is used. 217 * 218 * @param message custom message which should be used in report. 219 * @since 4.0 220 */ 221 public void setMessage(String message) { 222 this.message = message; 223 } 224 225 /** 226 * Setter to control whether to ignore matches found within comments. 227 * 228 * @param ignoreComments True if comments should be ignored. 229 * @since 4.0 230 */ 231 public void setIgnoreComments(boolean ignoreComments) { 232 this.ignoreComments = ignoreComments; 233 } 234 235 /** 236 * Setter to control whether the pattern is required or illegal. 237 * 238 * @param illegalPattern True if pattern is not allowed. 239 * @since 4.0 240 */ 241 public void setIllegalPattern(boolean illegalPattern) { 242 this.illegalPattern = illegalPattern; 243 } 244 245 /** 246 * Setter to specify the maximum number of violations before the check will abort. 247 * 248 * @param errorLimit the number of errors to report. 249 * @since 4.0 250 */ 251 public void setErrorLimit(int errorLimit) { 252 this.errorLimit = errorLimit; 253 } 254 255 /** 256 * Setter to control whether to check for duplicates of a required pattern, 257 * any negative value means no checking for duplicates, 258 * any positive value is used as the maximum number of allowed duplicates, 259 * if the limit is exceeded violations will be logged. 260 * 261 * @param duplicateLimit negative values mean no duplicate checking, 262 * any positive value is used as the limit. 263 * @since 4.0 264 */ 265 public void setDuplicateLimit(int duplicateLimit) { 266 this.duplicateLimit = duplicateLimit; 267 checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT; 268 } 269 270 /** 271 * Setter to specify the pattern to match against. 272 * 273 * @param pattern the new pattern 274 * @since 4.0 275 */ 276 public final void setFormat(Pattern pattern) { 277 format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE); 278 } 279 280 @Override 281 public int[] getDefaultTokens() { 282 return getRequiredTokens(); 283 } 284 285 @Override 286 public int[] getAcceptableTokens() { 287 return getRequiredTokens(); 288 } 289 290 @Override 291 public int[] getRequiredTokens() { 292 return CommonUtil.EMPTY_INT_ARRAY; 293 } 294 295 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 296 @SuppressWarnings("deprecation") 297 @Override 298 public void beginTree(DetailAST rootAST) { 299 matcher = format.matcher(getFileContents().getText().getFullText()); 300 matchCount = 0; 301 errorCount = 0; 302 findMatch(); 303 } 304 305 /** 306 * Recursive method that finds the matches. 307 * 308 * @noinspection TailRecursion 309 * @noinspectionreason TailRecursion - until issue #14814 310 */ 311 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 312 @SuppressWarnings("deprecation") 313 private void findMatch() { 314 final boolean foundMatch = matcher.find(); 315 if (foundMatch) { 316 final FileText text = getFileContents().getText(); 317 final LineColumn start = text.lineColumn(matcher.start()); 318 final int startLine = start.getLine(); 319 320 final boolean ignore = isIgnore(startLine, text, start); 321 322 if (!ignore) { 323 matchCount++; 324 if (illegalPattern || checkForDuplicates 325 && matchCount - 1 > duplicateLimit) { 326 errorCount++; 327 logMessage(startLine); 328 } 329 } 330 if (canContinueValidation(ignore)) { 331 findMatch(); 332 } 333 } 334 else if (!illegalPattern && matchCount == 0) { 335 final String msg = getMessage(); 336 log(1, MSG_REQUIRED_REGEXP, msg); 337 } 338 } 339 340 /** 341 * Check if we can stop validation. 342 * 343 * @param ignore flag 344 * @return true is we can continue 345 */ 346 private boolean canContinueValidation(boolean ignore) { 347 return errorCount <= errorLimit - 1 348 && (ignore || illegalPattern || checkForDuplicates); 349 } 350 351 /** 352 * Detect ignore situation. 353 * 354 * @param startLine position of line 355 * @param text file text 356 * @param start line column 357 * @return true is that need to be ignored 358 */ 359 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 360 @SuppressWarnings("deprecation") 361 private boolean isIgnore(int startLine, FileText text, LineColumn start) { 362 final LineColumn end; 363 if (matcher.end() == 0) { 364 end = text.lineColumn(0); 365 } 366 else { 367 end = text.lineColumn(matcher.end() - 1); 368 } 369 boolean ignore = false; 370 if (ignoreComments) { 371 final FileContents theFileContents = getFileContents(); 372 final int startColumn = start.getColumn(); 373 final int endLine = end.getLine(); 374 final int endColumn = end.getColumn(); 375 ignore = theFileContents.hasIntersectionWithComment(startLine, 376 startColumn, endLine, endColumn); 377 } 378 return ignore; 379 } 380 381 /** 382 * Displays the right message. 383 * 384 * @param lineNumber the line number the message relates to. 385 */ 386 private void logMessage(int lineNumber) { 387 final String msg = getMessage(); 388 389 if (illegalPattern) { 390 log(lineNumber, MSG_ILLEGAL_REGEXP, msg); 391 } 392 else { 393 log(lineNumber, MSG_DUPLICATE_REGEXP, msg); 394 } 395 } 396 397 /** 398 * Provide right message. 399 * 400 * @return message for violation. 401 */ 402 private String getMessage() { 403 String msg; 404 405 if (message == null || message.isEmpty()) { 406 msg = format.pattern(); 407 } 408 else { 409 msg = message; 410 } 411 412 if (errorCount >= errorLimit) { 413 msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg; 414 } 415 416 return msg; 417 } 418 419 }