001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2025 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.regexp; 021 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024 025import com.puppycrawl.tools.checkstyle.FileStatefulCheck; 026import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 027import com.puppycrawl.tools.checkstyle.api.DetailAST; 028import com.puppycrawl.tools.checkstyle.api.FileContents; 029import com.puppycrawl.tools.checkstyle.api.FileText; 030import com.puppycrawl.tools.checkstyle.api.LineColumn; 031import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 032 033/** 034 * <div> 035 * Checks that a specified pattern exists, exists less than 036 * a set number of times, or does not exist in the file. 037 * </div> 038 * 039 * <p> 040 * This check combines all the functionality provided by 041 * <a href="https://checkstyle.org/checks/header/regexpheader.html#RegexpHeader">RegexpHeader</a> 042 * except supplying the regular expression from a file. 043 * </p> 044 * 045 * <p> 046 * It differs from them in that it works in multiline mode. Its regular expression 047 * can span multiple lines and it checks this against the whole file at once. 048 * The others work in single-line mode. Their single or multiple regular expressions 049 * can only span one line. They check each of these against each line in the file in turn. 050 * </p> 051 * 052 * <p> 053 * <b>Note:</b> Because of the different mode of operation there may be some 054 * changes in the regular expressions used to achieve a particular end. 055 * </p> 056 * 057 * <p> 058 * In multiline mode... 059 * </p> 060 * <ul> 061 * <li> 062 * {@code ^} means the beginning of a line, as opposed to beginning of the input. 063 * </li> 064 * <li> 065 * For beginning of the input use {@code \A}. 066 * </li> 067 * <li> 068 * {@code $} means the end of a line, as opposed to the end of the input. 069 * </li> 070 * <li> 071 * For end of input use {@code \Z}. 072 * </li> 073 * <li> 074 * Each line in the file is terminated with a line feed character. 075 * </li> 076 * </ul> 077 * 078 * <p> 079 * <b>Note:</b> Not all regular expression engines are created equal. 080 * Some provide extra functions that others do not and some elements 081 * of the syntax may vary. This check makes use of the 082 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html"> 083 * java.util.regex package</a>; please check its documentation for details 084 * of how to construct a regular expression to achieve a particular goal. 085 * </p> 086 * 087 * <p> 088 * <b>Note:</b> When entering a regular expression as a parameter in 089 * the XML config file you must also take into account the XML rules. e.g. 090 * if you want to match a < symbol you need to enter &lt;. 091 * The regular expression should be entered on one line. 092 * </p> 093 * 094 * <p> 095 * <b>Note:</b> To search for parentheses () in a regular expression 096 * you must escape them like \(\). This is required by the regexp engine, 097 * otherwise it will think they are special instruction characters. 098 * </p> 099 * 100 * <p> 101 * <b>Note:</b> To search for things that mean something in XML, like 102 * < you need to escape them like &lt;. This is required so the 103 * XML parser does not act on them, but instead passes the correct 104 * character to the regexp engine. 105 * </p> 106 * <ul> 107 * <li> 108 * Property {@code duplicateLimit} - Control whether to check for duplicates 109 * of a required pattern, any negative value means no checking for duplicates, 110 * any positive value is used as the maximum number of allowed duplicates, 111 * if the limit is exceeded violations will be logged. 112 * Type is {@code int}. 113 * Default value is {@code 0}. 114 * </li> 115 * <li> 116 * Property {@code errorLimit} - Specify the maximum number of violations before 117 * the check will abort. 118 * Type is {@code int}. 119 * Default value is {@code 100}. 120 * </li> 121 * <li> 122 * Property {@code format} - Specify the pattern to match against. 123 * Type is {@code java.util.regex.Pattern}. 124 * Default value is {@code "^$"}. 125 * </li> 126 * <li> 127 * Property {@code ignoreComments} - Control whether to ignore matches found within comments. 128 * Type is {@code boolean}. 129 * Default value is {@code false}. 130 * </li> 131 * <li> 132 * Property {@code illegalPattern} - Control whether the pattern is required or illegal. 133 * Type is {@code boolean}. 134 * Default value is {@code false}. 135 * </li> 136 * <li> 137 * Property {@code message} - Specify message which is used to notify about 138 * violations, if empty then the default (hard-coded) message is used. 139 * Type is {@code java.lang.String}. 140 * Default value is {@code null}. 141 * </li> 142 * </ul> 143 * 144 * <p> 145 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker} 146 * </p> 147 * 148 * <p> 149 * Violation Message Keys: 150 * </p> 151 * <ul> 152 * <li> 153 * {@code duplicate.regexp} 154 * </li> 155 * <li> 156 * {@code illegal.regexp} 157 * </li> 158 * <li> 159 * {@code required.regexp} 160 * </li> 161 * </ul> 162 * 163 * @since 4.0 164 */ 165@FileStatefulCheck 166public class RegexpCheck extends AbstractCheck { 167 168 /** 169 * A key is pointing to the warning message text in "messages.properties" 170 * file. 171 */ 172 public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp"; 173 174 /** 175 * A key is pointing to the warning message text in "messages.properties" 176 * file. 177 */ 178 public static final String MSG_REQUIRED_REGEXP = "required.regexp"; 179 180 /** 181 * A key is pointing to the warning message text in "messages.properties" 182 * file. 183 */ 184 public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp"; 185 186 /** Default duplicate limit. */ 187 private static final int DEFAULT_DUPLICATE_LIMIT = -1; 188 189 /** Default error report limit. */ 190 private static final int DEFAULT_ERROR_LIMIT = 100; 191 192 /** Error count exceeded message. */ 193 private static final String ERROR_LIMIT_EXCEEDED_MESSAGE = 194 "The error limit has been exceeded, " 195 + "the check is aborting, there may be more unreported errors."; 196 197 /** 198 * Specify message which is used to notify about violations, 199 * if empty then the default (hard-coded) message is used. 200 */ 201 private String message; 202 203 /** Control whether to ignore matches found within comments. */ 204 private boolean ignoreComments; 205 206 /** Control whether the pattern is required or illegal. */ 207 private boolean illegalPattern; 208 209 /** Specify the maximum number of violations before the check will abort. */ 210 private int errorLimit = DEFAULT_ERROR_LIMIT; 211 212 /** 213 * Control whether to check for duplicates of a required pattern, 214 * any negative value means no checking for duplicates, 215 * any positive value is used as the maximum number of allowed duplicates, 216 * if the limit is exceeded violations will be logged. 217 */ 218 private int duplicateLimit; 219 220 /** Boolean to say if we should check for duplicates. */ 221 private boolean checkForDuplicates; 222 223 /** Specify the pattern to match against. */ 224 private Pattern format = Pattern.compile("^$", Pattern.MULTILINE); 225 226 /** 227 * Setter to specify message which is used to notify about violations, 228 * if empty then the default (hard-coded) message is used. 229 * 230 * @param message custom message which should be used in report. 231 * @since 4.0 232 */ 233 public void setMessage(String message) { 234 this.message = message; 235 } 236 237 /** 238 * Setter to control whether to ignore matches found within comments. 239 * 240 * @param ignoreComments True if comments should be ignored. 241 * @since 4.0 242 */ 243 public void setIgnoreComments(boolean ignoreComments) { 244 this.ignoreComments = ignoreComments; 245 } 246 247 /** 248 * Setter to control whether the pattern is required or illegal. 249 * 250 * @param illegalPattern True if pattern is not allowed. 251 * @since 4.0 252 */ 253 public void setIllegalPattern(boolean illegalPattern) { 254 this.illegalPattern = illegalPattern; 255 } 256 257 /** 258 * Setter to specify the maximum number of violations before the check will abort. 259 * 260 * @param errorLimit the number of errors to report. 261 * @since 4.0 262 */ 263 public void setErrorLimit(int errorLimit) { 264 this.errorLimit = errorLimit; 265 } 266 267 /** 268 * Setter to control whether to check for duplicates of a required pattern, 269 * any negative value means no checking for duplicates, 270 * any positive value is used as the maximum number of allowed duplicates, 271 * if the limit is exceeded violations will be logged. 272 * 273 * @param duplicateLimit negative values mean no duplicate checking, 274 * any positive value is used as the limit. 275 * @since 4.0 276 */ 277 public void setDuplicateLimit(int duplicateLimit) { 278 this.duplicateLimit = duplicateLimit; 279 checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT; 280 } 281 282 /** 283 * Setter to specify the pattern to match against. 284 * 285 * @param pattern the new pattern 286 * @since 4.0 287 */ 288 public final void setFormat(Pattern pattern) { 289 format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE); 290 } 291 292 @Override 293 public int[] getDefaultTokens() { 294 return getRequiredTokens(); 295 } 296 297 @Override 298 public int[] getAcceptableTokens() { 299 return getRequiredTokens(); 300 } 301 302 @Override 303 public int[] getRequiredTokens() { 304 return CommonUtil.EMPTY_INT_ARRAY; 305 } 306 307 @Override 308 public void beginTree(DetailAST rootAST) { 309 processRegexpMatches(); 310 } 311 312 /** 313 * Processes the regexp matches and logs the number of errors in the file. 314 * 315 */ 316 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 317 @SuppressWarnings("deprecation") 318 private void processRegexpMatches() { 319 final Matcher matcher = format.matcher(getFileContents().getText().getFullText()); 320 int errorCount = 0; 321 int matchCount = 0; 322 final FileText text = getFileContents().getText(); 323 while (errorCount < errorLimit && matcher.find()) { 324 final LineColumn start = text.lineColumn(matcher.start()); 325 final int startLine = start.getLine(); 326 327 final boolean ignore = isIgnore(startLine, text, start, matcher); 328 if (!ignore) { 329 matchCount++; 330 if (illegalPattern || checkForDuplicates 331 && matchCount - 1 > duplicateLimit) { 332 errorCount++; 333 logMessage(startLine, errorCount); 334 } 335 } 336 } 337 if (!illegalPattern && matchCount == 0) { 338 final String msg = getMessage(errorCount); 339 log(1, MSG_REQUIRED_REGEXP, msg); 340 } 341 } 342 343 /** 344 * Detect ignore situation. 345 * 346 * @param startLine position of line 347 * @param text file text 348 * @param start line column 349 * @param matcher The matcher 350 * @return true is that need to be ignored 351 */ 352 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 353 @SuppressWarnings("deprecation") 354 private boolean isIgnore(int startLine, FileText text, LineColumn start, Matcher matcher) { 355 final LineColumn end; 356 if (matcher.end() == 0) { 357 end = text.lineColumn(0); 358 } 359 else { 360 end = text.lineColumn(matcher.end() - 1); 361 } 362 boolean ignore = false; 363 if (ignoreComments) { 364 final FileContents theFileContents = getFileContents(); 365 final int startColumn = start.getColumn(); 366 final int endLine = end.getLine(); 367 final int endColumn = end.getColumn(); 368 ignore = theFileContents.hasIntersectionWithComment(startLine, 369 startColumn, endLine, endColumn); 370 } 371 return ignore; 372 } 373 374 /** 375 * Displays the right message. 376 * 377 * @param lineNumber the line number the message relates to. 378 * @param errorCount number of errors in the file. 379 */ 380 private void logMessage(int lineNumber, int errorCount) { 381 final String msg = getMessage(errorCount); 382 383 if (illegalPattern) { 384 log(lineNumber, MSG_ILLEGAL_REGEXP, msg); 385 } 386 else { 387 log(lineNumber, MSG_DUPLICATE_REGEXP, msg); 388 } 389 } 390 391 /** 392 * Provide right message. 393 * 394 * @param errorCount number of errors in the file. 395 * @return message for violation. 396 */ 397 private String getMessage(int errorCount) { 398 String msg; 399 400 if (message == null || message.isEmpty()) { 401 msg = format.pattern(); 402 } 403 else { 404 msg = message; 405 } 406 407 if (errorCount >= errorLimit) { 408 msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg; 409 } 410 411 return msg; 412 } 413}