View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2024 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.checks.regexp;
21  
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
26  import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
27  import com.puppycrawl.tools.checkstyle.api.DetailAST;
28  import com.puppycrawl.tools.checkstyle.api.FileContents;
29  import com.puppycrawl.tools.checkstyle.api.FileText;
30  import com.puppycrawl.tools.checkstyle.api.LineColumn;
31  import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
32  
33  /**
34   * <p>
35   * Checks that a specified pattern exists, exists less than
36   * a set number of times, or does not exist in the file.
37   * </p>
38   * <p>
39   * This check combines all the functionality provided by
40   * <a href="https://checkstyle.org/checks/header/regexpheader.html#RegexpHeader">RegexpHeader</a>
41   * except supplying the regular expression from a file.
42   * </p>
43   * <p>
44   * It differs from them in that it works in multiline mode. Its regular expression
45   * can span multiple lines and it checks this against the whole file at once.
46   * The others work in single-line mode. Their single or multiple regular expressions
47   * can only span one line. They check each of these against each line in the file in turn.
48   * </p>
49   * <p>
50   * <b>Note:</b> Because of the different mode of operation there may be some
51   * changes in the regular expressions used to achieve a particular end.
52   * </p>
53   * <p>
54   * In multiline mode...
55   * </p>
56   * <ul>
57   * <li>
58   * {@code ^} means the beginning of a line, as opposed to beginning of the input.
59   * </li>
60   * <li>
61   * For beginning of the input use {@code \A}.
62   * </li>
63   * <li>
64   * {@code $} means the end of a line, as opposed to the end of the input.
65   * </li>
66   * <li>
67   * For end of input use {@code \Z}.
68   * </li>
69   * <li>
70   * Each line in the file is terminated with a line feed character.
71   * </li>
72   * </ul>
73   * <p>
74   * <b>Note:</b> Not all regular expression engines are created equal.
75   * Some provide extra functions that others do not and some elements
76   * of the syntax may vary. This check makes use of the
77   * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html">
78   * java.util.regex package</a>; please check its documentation for details
79   * of how to construct a regular expression to achieve a particular goal.
80   * </p>
81   * <p>
82   * <b>Note:</b> When entering a regular expression as a parameter in
83   * the XML config file you must also take into account the XML rules. e.g.
84   * if you want to match a &lt; symbol you need to enter &amp;lt;.
85   * The regular expression should be entered on one line.
86   * </p>
87   * <ul>
88   * <li>
89   * Property {@code duplicateLimit} - Control whether to check for duplicates
90   * of a required pattern, any negative value means no checking for duplicates,
91   * any positive value is used as the maximum number of allowed duplicates,
92   * if the limit is exceeded violations will be logged.
93   * Type is {@code int}.
94   * Default value is {@code 0}.
95   * </li>
96   * <li>
97   * Property {@code errorLimit} - Specify the maximum number of violations before
98   * the check will abort.
99   * Type is {@code int}.
100  * Default value is {@code 100}.
101  * </li>
102  * <li>
103  * Property {@code format} - Specify the pattern to match against.
104  * Type is {@code java.util.regex.Pattern}.
105  * Default value is {@code "^$"}.
106  * </li>
107  * <li>
108  * Property {@code ignoreComments} - Control whether to ignore matches found within comments.
109  * Type is {@code boolean}.
110  * Default value is {@code false}.
111  * </li>
112  * <li>
113  * Property {@code illegalPattern} - Control whether the pattern is required or illegal.
114  * Type is {@code boolean}.
115  * Default value is {@code false}.
116  * </li>
117  * <li>
118  * Property {@code message} - Specify message which is used to notify about
119  * violations, if empty then the default (hard-coded) message is used.
120  * Type is {@code java.lang.String}.
121  * Default value is {@code null}.
122  * </li>
123  * </ul>
124  * <p>
125  * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
126  * </p>
127  * <p>
128  * Violation Message Keys:
129  * </p>
130  * <ul>
131  * <li>
132  * {@code duplicate.regexp}
133  * </li>
134  * <li>
135  * {@code illegal.regexp}
136  * </li>
137  * <li>
138  * {@code required.regexp}
139  * </li>
140  * </ul>
141  *
142  * @since 4.0
143  */
144 @FileStatefulCheck
145 public class RegexpCheck extends AbstractCheck {
146 
147     /**
148      * A key is pointing to the warning message text in "messages.properties"
149      * file.
150      */
151     public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp";
152 
153     /**
154      * A key is pointing to the warning message text in "messages.properties"
155      * file.
156      */
157     public static final String MSG_REQUIRED_REGEXP = "required.regexp";
158 
159     /**
160      * A key is pointing to the warning message text in "messages.properties"
161      * file.
162      */
163     public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp";
164 
165     /** Default duplicate limit. */
166     private static final int DEFAULT_DUPLICATE_LIMIT = -1;
167 
168     /** Default error report limit. */
169     private static final int DEFAULT_ERROR_LIMIT = 100;
170 
171     /** Error count exceeded message. */
172     private static final String ERROR_LIMIT_EXCEEDED_MESSAGE =
173         "The error limit has been exceeded, "
174         + "the check is aborting, there may be more unreported errors.";
175 
176     /**
177      * Specify message which is used to notify about violations,
178      * if empty then the default (hard-coded) message is used.
179      */
180     private String message;
181 
182     /** Control whether to ignore matches found within comments. */
183     private boolean ignoreComments;
184 
185     /** Control whether the pattern is required or illegal. */
186     private boolean illegalPattern;
187 
188     /** Specify the maximum number of violations before the check will abort. */
189     private int errorLimit = DEFAULT_ERROR_LIMIT;
190 
191     /**
192      * Control whether to check for duplicates of a required pattern,
193      * any negative value means no checking for duplicates,
194      * any positive value is used as the maximum number of allowed duplicates,
195      * if the limit is exceeded violations will be logged.
196      */
197     private int duplicateLimit;
198 
199     /** Boolean to say if we should check for duplicates. */
200     private boolean checkForDuplicates;
201 
202     /** Tracks number of matches made. */
203     private int matchCount;
204 
205     /** Tracks number of errors. */
206     private int errorCount;
207 
208     /** Specify the pattern to match against. */
209     private Pattern format = Pattern.compile("^$", Pattern.MULTILINE);
210 
211     /** The matcher. */
212     private Matcher matcher;
213 
214     /**
215      * Setter to specify message which is used to notify about violations,
216      * if empty then the default (hard-coded) message is used.
217      *
218      * @param message custom message which should be used in report.
219      * @since 4.0
220      */
221     public void setMessage(String message) {
222         this.message = message;
223     }
224 
225     /**
226      * Setter to control whether to ignore matches found within comments.
227      *
228      * @param ignoreComments True if comments should be ignored.
229      * @since 4.0
230      */
231     public void setIgnoreComments(boolean ignoreComments) {
232         this.ignoreComments = ignoreComments;
233     }
234 
235     /**
236      * Setter to control whether the pattern is required or illegal.
237      *
238      * @param illegalPattern True if pattern is not allowed.
239      * @since 4.0
240      */
241     public void setIllegalPattern(boolean illegalPattern) {
242         this.illegalPattern = illegalPattern;
243     }
244 
245     /**
246      * Setter to specify the maximum number of violations before the check will abort.
247      *
248      * @param errorLimit the number of errors to report.
249      * @since 4.0
250      */
251     public void setErrorLimit(int errorLimit) {
252         this.errorLimit = errorLimit;
253     }
254 
255     /**
256      * Setter to control whether to check for duplicates of a required pattern,
257      * any negative value means no checking for duplicates,
258      * any positive value is used as the maximum number of allowed duplicates,
259      * if the limit is exceeded violations will be logged.
260      *
261      * @param duplicateLimit negative values mean no duplicate checking,
262      *     any positive value is used as the limit.
263      * @since 4.0
264      */
265     public void setDuplicateLimit(int duplicateLimit) {
266         this.duplicateLimit = duplicateLimit;
267         checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT;
268     }
269 
270     /**
271      * Setter to specify the pattern to match against.
272      *
273      * @param pattern the new pattern
274      * @since 4.0
275      */
276     public final void setFormat(Pattern pattern) {
277         format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE);
278     }
279 
280     @Override
281     public int[] getDefaultTokens() {
282         return getRequiredTokens();
283     }
284 
285     @Override
286     public int[] getAcceptableTokens() {
287         return getRequiredTokens();
288     }
289 
290     @Override
291     public int[] getRequiredTokens() {
292         return CommonUtil.EMPTY_INT_ARRAY;
293     }
294 
295     // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
296     @SuppressWarnings("deprecation")
297     @Override
298     public void beginTree(DetailAST rootAST) {
299         matcher = format.matcher(getFileContents().getText().getFullText());
300         matchCount = 0;
301         errorCount = 0;
302         findMatch();
303     }
304 
305     /**
306      * Recursive method that finds the matches.
307      *
308      * @noinspection TailRecursion
309      * @noinspectionreason TailRecursion - until issue #14814
310      */
311     // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
312     @SuppressWarnings("deprecation")
313     private void findMatch() {
314         final boolean foundMatch = matcher.find();
315         if (foundMatch) {
316             final FileText text = getFileContents().getText();
317             final LineColumn start = text.lineColumn(matcher.start());
318             final int startLine = start.getLine();
319 
320             final boolean ignore = isIgnore(startLine, text, start);
321 
322             if (!ignore) {
323                 matchCount++;
324                 if (illegalPattern || checkForDuplicates
325                         && matchCount - 1 > duplicateLimit) {
326                     errorCount++;
327                     logMessage(startLine);
328                 }
329             }
330             if (canContinueValidation(ignore)) {
331                 findMatch();
332             }
333         }
334         else if (!illegalPattern && matchCount == 0) {
335             final String msg = getMessage();
336             log(1, MSG_REQUIRED_REGEXP, msg);
337         }
338     }
339 
340     /**
341      * Check if we can stop validation.
342      *
343      * @param ignore flag
344      * @return true is we can continue
345      */
346     private boolean canContinueValidation(boolean ignore) {
347         return errorCount <= errorLimit - 1
348                 && (ignore || illegalPattern || checkForDuplicates);
349     }
350 
351     /**
352      * Detect ignore situation.
353      *
354      * @param startLine position of line
355      * @param text file text
356      * @param start line column
357      * @return true is that need to be ignored
358      */
359     // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
360     @SuppressWarnings("deprecation")
361     private boolean isIgnore(int startLine, FileText text, LineColumn start) {
362         final LineColumn end;
363         if (matcher.end() == 0) {
364             end = text.lineColumn(0);
365         }
366         else {
367             end = text.lineColumn(matcher.end() - 1);
368         }
369         boolean ignore = false;
370         if (ignoreComments) {
371             final FileContents theFileContents = getFileContents();
372             final int startColumn = start.getColumn();
373             final int endLine = end.getLine();
374             final int endColumn = end.getColumn();
375             ignore = theFileContents.hasIntersectionWithComment(startLine,
376                 startColumn, endLine, endColumn);
377         }
378         return ignore;
379     }
380 
381     /**
382      * Displays the right message.
383      *
384      * @param lineNumber the line number the message relates to.
385      */
386     private void logMessage(int lineNumber) {
387         final String msg = getMessage();
388 
389         if (illegalPattern) {
390             log(lineNumber, MSG_ILLEGAL_REGEXP, msg);
391         }
392         else {
393             log(lineNumber, MSG_DUPLICATE_REGEXP, msg);
394         }
395     }
396 
397     /**
398      * Provide right message.
399      *
400      * @return message for violation.
401      */
402     private String getMessage() {
403         String msg;
404 
405         if (message == null || message.isEmpty()) {
406             msg = format.pattern();
407         }
408         else {
409             msg = message;
410         }
411 
412         if (errorCount >= errorLimit) {
413             msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg;
414         }
415 
416         return msg;
417     }
418 
419 }