View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2025 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.checks.regexp;
21  
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
26  import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
27  import com.puppycrawl.tools.checkstyle.api.DetailAST;
28  import com.puppycrawl.tools.checkstyle.api.FileContents;
29  import com.puppycrawl.tools.checkstyle.api.FileText;
30  import com.puppycrawl.tools.checkstyle.api.LineColumn;
31  import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
32  
33  /**
34   * <div>
35   * Checks that a specified pattern exists, exists less than
36   * a set number of times, or does not exist in the file.
37   * </div>
38   *
39   * <p>
40   * This check combines all the functionality provided by
41   * <a href="https://checkstyle.org/checks/header/regexpheader.html#RegexpHeader">RegexpHeader</a>
42   * except supplying the regular expression from a file.
43   * </p>
44   *
45   * <p>
46   * It differs from them in that it works in multiline mode. Its regular expression
47   * can span multiple lines and it checks this against the whole file at once.
48   * The others work in single-line mode. Their single or multiple regular expressions
49   * can only span one line. They check each of these against each line in the file in turn.
50   * </p>
51   *
52   * <p>
53   * <b>Note:</b> Because of the different mode of operation there may be some
54   * changes in the regular expressions used to achieve a particular end.
55   * </p>
56   *
57   * <p>
58   * In multiline mode...
59   * </p>
60   * <ul>
61   * <li>
62   * {@code ^} means the beginning of a line, as opposed to beginning of the input.
63   * </li>
64   * <li>
65   * For beginning of the input use {@code \A}.
66   * </li>
67   * <li>
68   * {@code $} means the end of a line, as opposed to the end of the input.
69   * </li>
70   * <li>
71   * For end of input use {@code \Z}.
72   * </li>
73   * <li>
74   * Each line in the file is terminated with a line feed character.
75   * </li>
76   * </ul>
77   *
78   * <p>
79   * <b>Note:</b> Not all regular expression engines are created equal.
80   * Some provide extra functions that others do not and some elements
81   * of the syntax may vary. This check makes use of the
82   * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html">
83   * java.util.regex package</a>; please check its documentation for details
84   * of how to construct a regular expression to achieve a particular goal.
85   * </p>
86   *
87   * <p>
88   * <b>Note:</b> When entering a regular expression as a parameter in
89   * the XML config file you must also take into account the XML rules. e.g.
90   * if you want to match a &lt; symbol you need to enter &amp;lt;.
91   * The regular expression should be entered on one line.
92   * </p>
93   *
94   * <p>
95   * <b>Note:</b> To search for parentheses () in a regular expression
96   * you must escape them like \(\). This is required by the regexp engine,
97   * otherwise it will think they are special instruction characters.
98   * </p>
99   *
100  * <p>
101  * <b>Note:</b> To search for things that mean something in XML, like
102  * &lt; you need to escape them like &amp;lt;. This is required so the
103  * XML parser does not act on them, but instead passes the correct
104  * character to the regexp engine.
105  * </p>
106  * <ul>
107  * <li>
108  * Property {@code duplicateLimit} - Control whether to check for duplicates
109  * of a required pattern, any negative value means no checking for duplicates,
110  * any positive value is used as the maximum number of allowed duplicates,
111  * if the limit is exceeded violations will be logged.
112  * Type is {@code int}.
113  * Default value is {@code 0}.
114  * </li>
115  * <li>
116  * Property {@code errorLimit} - Specify the maximum number of violations before
117  * the check will abort.
118  * Type is {@code int}.
119  * Default value is {@code 100}.
120  * </li>
121  * <li>
122  * Property {@code format} - Specify the pattern to match against.
123  * Type is {@code java.util.regex.Pattern}.
124  * Default value is {@code "^$"}.
125  * </li>
126  * <li>
127  * Property {@code ignoreComments} - Control whether to ignore matches found within comments.
128  * Type is {@code boolean}.
129  * Default value is {@code false}.
130  * </li>
131  * <li>
132  * Property {@code illegalPattern} - Control whether the pattern is required or illegal.
133  * Type is {@code boolean}.
134  * Default value is {@code false}.
135  * </li>
136  * <li>
137  * Property {@code message} - Specify message which is used to notify about
138  * violations, if empty then the default (hard-coded) message is used.
139  * Type is {@code java.lang.String}.
140  * Default value is {@code null}.
141  * </li>
142  * </ul>
143  *
144  * <p>
145  * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
146  * </p>
147  *
148  * <p>
149  * Violation Message Keys:
150  * </p>
151  * <ul>
152  * <li>
153  * {@code duplicate.regexp}
154  * </li>
155  * <li>
156  * {@code illegal.regexp}
157  * </li>
158  * <li>
159  * {@code required.regexp}
160  * </li>
161  * </ul>
162  *
163  * @since 4.0
164  */
165 @FileStatefulCheck
166 public class RegexpCheck extends AbstractCheck {
167 
168     /**
169      * A key is pointing to the warning message text in "messages.properties"
170      * file.
171      */
172     public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp";
173 
174     /**
175      * A key is pointing to the warning message text in "messages.properties"
176      * file.
177      */
178     public static final String MSG_REQUIRED_REGEXP = "required.regexp";
179 
180     /**
181      * A key is pointing to the warning message text in "messages.properties"
182      * file.
183      */
184     public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp";
185 
186     /** Default duplicate limit. */
187     private static final int DEFAULT_DUPLICATE_LIMIT = -1;
188 
189     /** Default error report limit. */
190     private static final int DEFAULT_ERROR_LIMIT = 100;
191 
192     /** Error count exceeded message. */
193     private static final String ERROR_LIMIT_EXCEEDED_MESSAGE =
194         "The error limit has been exceeded, "
195         + "the check is aborting, there may be more unreported errors.";
196 
197     /**
198      * Specify message which is used to notify about violations,
199      * if empty then the default (hard-coded) message is used.
200      */
201     private String message;
202 
203     /** Control whether to ignore matches found within comments. */
204     private boolean ignoreComments;
205 
206     /** Control whether the pattern is required or illegal. */
207     private boolean illegalPattern;
208 
209     /** Specify the maximum number of violations before the check will abort. */
210     private int errorLimit = DEFAULT_ERROR_LIMIT;
211 
212     /**
213      * Control whether to check for duplicates of a required pattern,
214      * any negative value means no checking for duplicates,
215      * any positive value is used as the maximum number of allowed duplicates,
216      * if the limit is exceeded violations will be logged.
217      */
218     private int duplicateLimit;
219 
220     /** Boolean to say if we should check for duplicates. */
221     private boolean checkForDuplicates;
222 
223     /** Specify the pattern to match against. */
224     private Pattern format = Pattern.compile("^$", Pattern.MULTILINE);
225 
226     /**
227      * Setter to specify message which is used to notify about violations,
228      * if empty then the default (hard-coded) message is used.
229      *
230      * @param message custom message which should be used in report.
231      * @since 4.0
232      */
233     public void setMessage(String message) {
234         this.message = message;
235     }
236 
237     /**
238      * Setter to control whether to ignore matches found within comments.
239      *
240      * @param ignoreComments True if comments should be ignored.
241      * @since 4.0
242      */
243     public void setIgnoreComments(boolean ignoreComments) {
244         this.ignoreComments = ignoreComments;
245     }
246 
247     /**
248      * Setter to control whether the pattern is required or illegal.
249      *
250      * @param illegalPattern True if pattern is not allowed.
251      * @since 4.0
252      */
253     public void setIllegalPattern(boolean illegalPattern) {
254         this.illegalPattern = illegalPattern;
255     }
256 
257     /**
258      * Setter to specify the maximum number of violations before the check will abort.
259      *
260      * @param errorLimit the number of errors to report.
261      * @since 4.0
262      */
263     public void setErrorLimit(int errorLimit) {
264         this.errorLimit = errorLimit;
265     }
266 
267     /**
268      * Setter to control whether to check for duplicates of a required pattern,
269      * any negative value means no checking for duplicates,
270      * any positive value is used as the maximum number of allowed duplicates,
271      * if the limit is exceeded violations will be logged.
272      *
273      * @param duplicateLimit negative values mean no duplicate checking,
274      *     any positive value is used as the limit.
275      * @since 4.0
276      */
277     public void setDuplicateLimit(int duplicateLimit) {
278         this.duplicateLimit = duplicateLimit;
279         checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT;
280     }
281 
282     /**
283      * Setter to specify the pattern to match against.
284      *
285      * @param pattern the new pattern
286      * @since 4.0
287      */
288     public final void setFormat(Pattern pattern) {
289         format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE);
290     }
291 
292     @Override
293     public int[] getDefaultTokens() {
294         return getRequiredTokens();
295     }
296 
297     @Override
298     public int[] getAcceptableTokens() {
299         return getRequiredTokens();
300     }
301 
302     @Override
303     public int[] getRequiredTokens() {
304         return CommonUtil.EMPTY_INT_ARRAY;
305     }
306 
307     @Override
308     public void beginTree(DetailAST rootAST) {
309         processRegexpMatches();
310     }
311 
312     /**
313      * Processes the regexp matches and logs the number of errors in the file.
314      *
315      */
316     // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
317     @SuppressWarnings("deprecation")
318     private void processRegexpMatches() {
319         final Matcher matcher = format.matcher(getFileContents().getText().getFullText());
320         int errorCount = 0;
321         int matchCount = 0;
322         final FileText text = getFileContents().getText();
323         while (errorCount < errorLimit && matcher.find()) {
324             final LineColumn start = text.lineColumn(matcher.start());
325             final int startLine = start.getLine();
326 
327             final boolean ignore = isIgnore(startLine, text, start, matcher);
328             if (!ignore) {
329                 matchCount++;
330                 if (illegalPattern || checkForDuplicates
331                         && matchCount - 1 > duplicateLimit) {
332                     errorCount++;
333                     logMessage(startLine, errorCount);
334                 }
335             }
336         }
337         if (!illegalPattern && matchCount == 0) {
338             final String msg = getMessage(errorCount);
339             log(1, MSG_REQUIRED_REGEXP, msg);
340         }
341     }
342 
343     /**
344      * Detect ignore situation.
345      *
346      * @param startLine position of line
347      * @param text file text
348      * @param start line column
349      * @param matcher The matcher
350      * @return true is that need to be ignored
351      */
352     // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
353     @SuppressWarnings("deprecation")
354     private boolean isIgnore(int startLine, FileText text, LineColumn start, Matcher matcher) {
355         final LineColumn end;
356         if (matcher.end() == 0) {
357             end = text.lineColumn(0);
358         }
359         else {
360             end = text.lineColumn(matcher.end() - 1);
361         }
362         boolean ignore = false;
363         if (ignoreComments) {
364             final FileContents theFileContents = getFileContents();
365             final int startColumn = start.getColumn();
366             final int endLine = end.getLine();
367             final int endColumn = end.getColumn();
368             ignore = theFileContents.hasIntersectionWithComment(startLine,
369                 startColumn, endLine, endColumn);
370         }
371         return ignore;
372     }
373 
374     /**
375      * Displays the right message.
376      *
377      * @param lineNumber the line number the message relates to.
378      * @param errorCount number of errors in the file.
379      */
380     private void logMessage(int lineNumber, int errorCount) {
381         final String msg = getMessage(errorCount);
382 
383         if (illegalPattern) {
384             log(lineNumber, MSG_ILLEGAL_REGEXP, msg);
385         }
386         else {
387             log(lineNumber, MSG_DUPLICATE_REGEXP, msg);
388         }
389     }
390 
391     /**
392      * Provide right message.
393      *
394      * @param errorCount number of errors in the file.
395      * @return message for violation.
396      */
397     private String getMessage(int errorCount) {
398         String msg;
399 
400         if (message == null || message.isEmpty()) {
401             msg = format.pattern();
402         }
403         else {
404             msg = message;
405         }
406 
407         if (errorCount >= errorLimit) {
408             msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg;
409         }
410 
411         return msg;
412     }
413 }