1 ///////////////////////////////////////////////////////////////////////////////////////////////
2 // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3 // Copyright (C) 2001-2025 the original author or authors.
4 //
5 // This library is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU Lesser General Public
7 // License as published by the Free Software Foundation; either
8 // version 2.1 of the License, or (at your option) any later version.
9 //
10 // This library is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public
16 // License along with this library; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 ///////////////////////////////////////////////////////////////////////////////////////////////
19
20 package com.puppycrawl.tools.checkstyle.checks.regexp;
21
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24
25 import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
26 import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
27 import com.puppycrawl.tools.checkstyle.api.DetailAST;
28 import com.puppycrawl.tools.checkstyle.api.FileContents;
29 import com.puppycrawl.tools.checkstyle.api.FileText;
30 import com.puppycrawl.tools.checkstyle.api.LineColumn;
31 import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
32
33 /**
34 * <div>
35 * Checks that a specified pattern exists, exists less than
36 * a set number of times, or does not exist in the file.
37 * </div>
38 *
39 * <p>
40 * This check combines all the functionality provided by
41 * <a href="https://checkstyle.org/checks/header/regexpheader.html">RegexpHeader</a>
42 * except supplying the regular expression from a file.
43 * </p>
44 *
45 * <p>
46 * It differs from them in that it works in multiline mode. Its regular expression
47 * can span multiple lines and it checks this against the whole file at once.
48 * The others work in single-line mode. Their single or multiple regular expressions
49 * can only span one line. They check each of these against each line in the file in turn.
50 * </p>
51 *
52 * <p>
53 * <b>Note:</b> Because of the different mode of operation there may be some
54 * changes in the regular expressions used to achieve a particular end.
55 * </p>
56 *
57 * <p>
58 * In multiline mode...
59 * </p>
60 * <ul>
61 * <li>
62 * {@code ^} means the beginning of a line, as opposed to beginning of the input.
63 * </li>
64 * <li>
65 * For beginning of the input use {@code \A}.
66 * </li>
67 * <li>
68 * {@code $} means the end of a line, as opposed to the end of the input.
69 * </li>
70 * <li>
71 * For end of input use {@code \Z}.
72 * </li>
73 * <li>
74 * Each line in the file is terminated with a line feed character.
75 * </li>
76 * </ul>
77 *
78 * <p>
79 * <b>Note:</b> Not all regular expression engines are created equal.
80 * Some provide extra functions that others do not and some elements
81 * of the syntax may vary. This check makes use of the
82 * <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/regex/package-summary.html">
83 * java.util.regex package</a>; please check its documentation for details
84 * of how to construct a regular expression to achieve a particular goal.
85 * </p>
86 *
87 * <p>
88 * <b>Note:</b> When entering a regular expression as a parameter in
89 * the XML config file you must also take into account the XML rules. e.g.
90 * if you want to match a < symbol you need to enter &lt;.
91 * The regular expression should be entered on one line.
92 * </p>
93 *
94 * <p>
95 * <b>Note:</b> To search for parentheses () in a regular expression
96 * you must escape them like \(\). This is required by the regexp engine,
97 * otherwise it will think they are special instruction characters.
98 * </p>
99 *
100 * <p>
101 * <b>Note:</b> To search for things that mean something in XML, like
102 * < you need to escape them like &lt;. This is required so the
103 * XML parser does not act on them, but instead passes the correct
104 * character to the regexp engine.
105 * </p>
106 *
107 * @since 4.0
108 */
109 @FileStatefulCheck
110 public class RegexpCheck extends AbstractCheck {
111
112 /**
113 * A key is pointing to the warning message text in "messages.properties"
114 * file.
115 */
116 public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp";
117
118 /**
119 * A key is pointing to the warning message text in "messages.properties"
120 * file.
121 */
122 public static final String MSG_REQUIRED_REGEXP = "required.regexp";
123
124 /**
125 * A key is pointing to the warning message text in "messages.properties"
126 * file.
127 */
128 public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp";
129
130 /** Default duplicate limit. */
131 private static final int DEFAULT_DUPLICATE_LIMIT = -1;
132
133 /** Default error report limit. */
134 private static final int DEFAULT_ERROR_LIMIT = 100;
135
136 /** Error count exceeded message. */
137 private static final String ERROR_LIMIT_EXCEEDED_MESSAGE =
138 "The error limit has been exceeded, "
139 + "the check is aborting, there may be more unreported errors.";
140
141 /**
142 * Specify message which is used to notify about violations,
143 * if empty then the default (hard-coded) message is used.
144 */
145 private String message;
146
147 /** Control whether to ignore matches found within comments. */
148 private boolean ignoreComments;
149
150 /** Control whether the pattern is required or illegal. */
151 private boolean illegalPattern;
152
153 /** Specify the maximum number of violations before the check will abort. */
154 private int errorLimit = DEFAULT_ERROR_LIMIT;
155
156 /**
157 * Control whether to check for duplicates of a required pattern,
158 * any negative value means no checking for duplicates,
159 * any positive value is used as the maximum number of allowed duplicates,
160 * if the limit is exceeded violations will be logged.
161 */
162 private int duplicateLimit;
163
164 /** Boolean to say if we should check for duplicates. */
165 private boolean checkForDuplicates;
166
167 /** Specify the pattern to match against. */
168 private Pattern format = Pattern.compile("^$", Pattern.MULTILINE);
169
170 /**
171 * Setter to specify message which is used to notify about violations,
172 * if empty then the default (hard-coded) message is used.
173 *
174 * @param message custom message which should be used in report.
175 * @since 4.0
176 */
177 public void setMessage(String message) {
178 this.message = message;
179 }
180
181 /**
182 * Setter to control whether to ignore matches found within comments.
183 *
184 * @param ignoreComments True if comments should be ignored.
185 * @since 4.0
186 */
187 public void setIgnoreComments(boolean ignoreComments) {
188 this.ignoreComments = ignoreComments;
189 }
190
191 /**
192 * Setter to control whether the pattern is required or illegal.
193 *
194 * @param illegalPattern True if pattern is not allowed.
195 * @since 4.0
196 */
197 public void setIllegalPattern(boolean illegalPattern) {
198 this.illegalPattern = illegalPattern;
199 }
200
201 /**
202 * Setter to specify the maximum number of violations before the check will abort.
203 *
204 * @param errorLimit the number of errors to report.
205 * @since 4.0
206 */
207 public void setErrorLimit(int errorLimit) {
208 this.errorLimit = errorLimit;
209 }
210
211 /**
212 * Setter to control whether to check for duplicates of a required pattern,
213 * any negative value means no checking for duplicates,
214 * any positive value is used as the maximum number of allowed duplicates,
215 * if the limit is exceeded violations will be logged.
216 *
217 * @param duplicateLimit negative values mean no duplicate checking,
218 * any positive value is used as the limit.
219 * @since 4.0
220 */
221 public void setDuplicateLimit(int duplicateLimit) {
222 this.duplicateLimit = duplicateLimit;
223 checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT;
224 }
225
226 /**
227 * Setter to specify the pattern to match against.
228 *
229 * @param pattern the new pattern
230 * @since 4.0
231 */
232 public final void setFormat(Pattern pattern) {
233 format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE);
234 }
235
236 @Override
237 public int[] getDefaultTokens() {
238 return getRequiredTokens();
239 }
240
241 @Override
242 public int[] getAcceptableTokens() {
243 return getRequiredTokens();
244 }
245
246 @Override
247 public int[] getRequiredTokens() {
248 return CommonUtil.EMPTY_INT_ARRAY;
249 }
250
251 @Override
252 public void beginTree(DetailAST rootAST) {
253 processRegexpMatches();
254 }
255
256 /**
257 * Processes the regexp matches and logs the number of errors in the file.
258 *
259 */
260 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
261 @SuppressWarnings("deprecation")
262 private void processRegexpMatches() {
263 final Matcher matcher = format.matcher(getFileContents().getText().getFullText());
264 int errorCount = 0;
265 int matchCount = 0;
266 final FileText text = getFileContents().getText();
267 while (errorCount < errorLimit && matcher.find()) {
268 final LineColumn start = text.lineColumn(matcher.start());
269 final int startLine = start.getLine();
270
271 final boolean ignore = isIgnore(startLine, text, start, matcher);
272 if (!ignore) {
273 matchCount++;
274 if (illegalPattern || checkForDuplicates
275 && matchCount - 1 > duplicateLimit) {
276 errorCount++;
277 logMessage(startLine, errorCount);
278 }
279 }
280 }
281 if (!illegalPattern && matchCount == 0) {
282 final String msg = getMessage(errorCount);
283 log(1, MSG_REQUIRED_REGEXP, msg);
284 }
285 }
286
287 /**
288 * Detect ignore situation.
289 *
290 * @param startLine position of line
291 * @param text file text
292 * @param start line column
293 * @param matcher The matcher
294 * @return true is that need to be ignored
295 */
296 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
297 @SuppressWarnings("deprecation")
298 private boolean isIgnore(int startLine, FileText text, LineColumn start, Matcher matcher) {
299 final LineColumn end;
300 if (matcher.end() == 0) {
301 end = text.lineColumn(0);
302 }
303 else {
304 end = text.lineColumn(matcher.end() - 1);
305 }
306 boolean ignore = false;
307 if (ignoreComments) {
308 final FileContents theFileContents = getFileContents();
309 final int startColumn = start.getColumn();
310 final int endLine = end.getLine();
311 final int endColumn = end.getColumn();
312 ignore = theFileContents.hasIntersectionWithComment(startLine,
313 startColumn, endLine, endColumn);
314 }
315 return ignore;
316 }
317
318 /**
319 * Displays the right message.
320 *
321 * @param lineNumber the line number the message relates to.
322 * @param errorCount number of errors in the file.
323 */
324 private void logMessage(int lineNumber, int errorCount) {
325 final String msg = getMessage(errorCount);
326
327 if (illegalPattern) {
328 log(lineNumber, MSG_ILLEGAL_REGEXP, msg);
329 }
330 else {
331 log(lineNumber, MSG_DUPLICATE_REGEXP, msg);
332 }
333 }
334
335 /**
336 * Provide right message.
337 *
338 * @param errorCount number of errors in the file.
339 * @return message for violation.
340 */
341 private String getMessage(int errorCount) {
342 String msg;
343
344 if (message == null || message.isEmpty()) {
345 msg = format.pattern();
346 }
347 else {
348 msg = message;
349 }
350
351 if (errorCount >= errorLimit) {
352 msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg;
353 }
354
355 return msg;
356 }
357 }