001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2024 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.regexp;
021
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024
025import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
026import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
027import com.puppycrawl.tools.checkstyle.api.DetailAST;
028import com.puppycrawl.tools.checkstyle.api.FileContents;
029import com.puppycrawl.tools.checkstyle.api.FileText;
030import com.puppycrawl.tools.checkstyle.api.LineColumn;
031import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
032
033/**
034 * <p>
035 * Checks that a specified pattern exists, exists less than
036 * a set number of times, or does not exist in the file.
037 * </p>
038 * <p>
039 * This check combines all the functionality provided by
040 * <a href="https://checkstyle.org/checks/header/regexpheader.html#RegexpHeader">RegexpHeader</a>
041 * except supplying the regular expression from a file.
042 * </p>
043 * <p>
044 * It differs from them in that it works in multiline mode. Its regular expression
045 * can span multiple lines and it checks this against the whole file at once.
046 * The others work in single-line mode. Their single or multiple regular expressions
047 * can only span one line. They check each of these against each line in the file in turn.
048 * </p>
049 * <p>
050 * <b>Note:</b> Because of the different mode of operation there may be some
051 * changes in the regular expressions used to achieve a particular end.
052 * </p>
053 * <p>
054 * In multiline mode...
055 * </p>
056 * <ul>
057 * <li>
058 * {@code ^} means the beginning of a line, as opposed to beginning of the input.
059 * </li>
060 * <li>
061 * For beginning of the input use {@code \A}.
062 * </li>
063 * <li>
064 * {@code $} means the end of a line, as opposed to the end of the input.
065 * </li>
066 * <li>
067 * For end of input use {@code \Z}.
068 * </li>
069 * <li>
070 * Each line in the file is terminated with a line feed character.
071 * </li>
072 * </ul>
073 * <p>
074 * <b>Note:</b> Not all regular expression engines are created equal.
075 * Some provide extra functions that others do not and some elements
076 * of the syntax may vary. This check makes use of the
077 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html">
078 * java.util.regex package</a>; please check its documentation for details
079 * of how to construct a regular expression to achieve a particular goal.
080 * </p>
081 * <p>
082 * <b>Note:</b> When entering a regular expression as a parameter in
083 * the XML config file you must also take into account the XML rules. e.g.
084 * if you want to match a &lt; symbol you need to enter &amp;lt;.
085 * The regular expression should be entered on one line.
086 * </p>
087 * <ul>
088 * <li>
089 * Property {@code duplicateLimit} - Control whether to check for duplicates
090 * of a required pattern, any negative value means no checking for duplicates,
091 * any positive value is used as the maximum number of allowed duplicates,
092 * if the limit is exceeded violations will be logged.
093 * Type is {@code int}.
094 * Default value is {@code 0}.
095 * </li>
096 * <li>
097 * Property {@code errorLimit} - Specify the maximum number of violations before
098 * the check will abort.
099 * Type is {@code int}.
100 * Default value is {@code 100}.
101 * </li>
102 * <li>
103 * Property {@code format} - Specify the pattern to match against.
104 * Type is {@code java.util.regex.Pattern}.
105 * Default value is {@code "^$"}.
106 * </li>
107 * <li>
108 * Property {@code ignoreComments} - Control whether to ignore matches found within comments.
109 * Type is {@code boolean}.
110 * Default value is {@code false}.
111 * </li>
112 * <li>
113 * Property {@code illegalPattern} - Control whether the pattern is required or illegal.
114 * Type is {@code boolean}.
115 * Default value is {@code false}.
116 * </li>
117 * <li>
118 * Property {@code message} - Specify message which is used to notify about
119 * violations, if empty then the default (hard-coded) message is used.
120 * Type is {@code java.lang.String}.
121 * Default value is {@code null}.
122 * </li>
123 * </ul>
124 * <p>
125 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
126 * </p>
127 * <p>
128 * Violation Message Keys:
129 * </p>
130 * <ul>
131 * <li>
132 * {@code duplicate.regexp}
133 * </li>
134 * <li>
135 * {@code illegal.regexp}
136 * </li>
137 * <li>
138 * {@code required.regexp}
139 * </li>
140 * </ul>
141 *
142 * @since 4.0
143 */
144@FileStatefulCheck
145public class RegexpCheck extends AbstractCheck {
146
147    /**
148     * A key is pointing to the warning message text in "messages.properties"
149     * file.
150     */
151    public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp";
152
153    /**
154     * A key is pointing to the warning message text in "messages.properties"
155     * file.
156     */
157    public static final String MSG_REQUIRED_REGEXP = "required.regexp";
158
159    /**
160     * A key is pointing to the warning message text in "messages.properties"
161     * file.
162     */
163    public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp";
164
165    /** Default duplicate limit. */
166    private static final int DEFAULT_DUPLICATE_LIMIT = -1;
167
168    /** Default error report limit. */
169    private static final int DEFAULT_ERROR_LIMIT = 100;
170
171    /** Error count exceeded message. */
172    private static final String ERROR_LIMIT_EXCEEDED_MESSAGE =
173        "The error limit has been exceeded, "
174        + "the check is aborting, there may be more unreported errors.";
175
176    /**
177     * Specify message which is used to notify about violations,
178     * if empty then the default (hard-coded) message is used.
179     */
180    private String message;
181
182    /** Control whether to ignore matches found within comments. */
183    private boolean ignoreComments;
184
185    /** Control whether the pattern is required or illegal. */
186    private boolean illegalPattern;
187
188    /** Specify the maximum number of violations before the check will abort. */
189    private int errorLimit = DEFAULT_ERROR_LIMIT;
190
191    /**
192     * Control whether to check for duplicates of a required pattern,
193     * any negative value means no checking for duplicates,
194     * any positive value is used as the maximum number of allowed duplicates,
195     * if the limit is exceeded violations will be logged.
196     */
197    private int duplicateLimit;
198
199    /** Boolean to say if we should check for duplicates. */
200    private boolean checkForDuplicates;
201
202    /** Tracks number of matches made. */
203    private int matchCount;
204
205    /** Tracks number of errors. */
206    private int errorCount;
207
208    /** Specify the pattern to match against. */
209    private Pattern format = Pattern.compile("^$", Pattern.MULTILINE);
210
211    /** The matcher. */
212    private Matcher matcher;
213
214    /**
215     * Setter to specify message which is used to notify about violations,
216     * if empty then the default (hard-coded) message is used.
217     *
218     * @param message custom message which should be used in report.
219     * @since 4.0
220     */
221    public void setMessage(String message) {
222        this.message = message;
223    }
224
225    /**
226     * Setter to control whether to ignore matches found within comments.
227     *
228     * @param ignoreComments True if comments should be ignored.
229     * @since 4.0
230     */
231    public void setIgnoreComments(boolean ignoreComments) {
232        this.ignoreComments = ignoreComments;
233    }
234
235    /**
236     * Setter to control whether the pattern is required or illegal.
237     *
238     * @param illegalPattern True if pattern is not allowed.
239     * @since 4.0
240     */
241    public void setIllegalPattern(boolean illegalPattern) {
242        this.illegalPattern = illegalPattern;
243    }
244
245    /**
246     * Setter to specify the maximum number of violations before the check will abort.
247     *
248     * @param errorLimit the number of errors to report.
249     * @since 4.0
250     */
251    public void setErrorLimit(int errorLimit) {
252        this.errorLimit = errorLimit;
253    }
254
255    /**
256     * Setter to control whether to check for duplicates of a required pattern,
257     * any negative value means no checking for duplicates,
258     * any positive value is used as the maximum number of allowed duplicates,
259     * if the limit is exceeded violations will be logged.
260     *
261     * @param duplicateLimit negative values mean no duplicate checking,
262     *     any positive value is used as the limit.
263     * @since 4.0
264     */
265    public void setDuplicateLimit(int duplicateLimit) {
266        this.duplicateLimit = duplicateLimit;
267        checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT;
268    }
269
270    /**
271     * Setter to specify the pattern to match against.
272     *
273     * @param pattern the new pattern
274     * @since 4.0
275     */
276    public final void setFormat(Pattern pattern) {
277        format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE);
278    }
279
280    @Override
281    public int[] getDefaultTokens() {
282        return getRequiredTokens();
283    }
284
285    @Override
286    public int[] getAcceptableTokens() {
287        return getRequiredTokens();
288    }
289
290    @Override
291    public int[] getRequiredTokens() {
292        return CommonUtil.EMPTY_INT_ARRAY;
293    }
294
295    // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
296    @SuppressWarnings("deprecation")
297    @Override
298    public void beginTree(DetailAST rootAST) {
299        matcher = format.matcher(getFileContents().getText().getFullText());
300        matchCount = 0;
301        errorCount = 0;
302        findMatch();
303    }
304
305    /**
306     * Recursive method that finds the matches.
307     *
308     * @noinspection TailRecursion
309     * @noinspectionreason TailRecursion - until issue #14814
310     */
311    // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
312    @SuppressWarnings("deprecation")
313    private void findMatch() {
314        final boolean foundMatch = matcher.find();
315        if (foundMatch) {
316            final FileText text = getFileContents().getText();
317            final LineColumn start = text.lineColumn(matcher.start());
318            final int startLine = start.getLine();
319
320            final boolean ignore = isIgnore(startLine, text, start);
321
322            if (!ignore) {
323                matchCount++;
324                if (illegalPattern || checkForDuplicates
325                        && matchCount - 1 > duplicateLimit) {
326                    errorCount++;
327                    logMessage(startLine);
328                }
329            }
330            if (canContinueValidation(ignore)) {
331                findMatch();
332            }
333        }
334        else if (!illegalPattern && matchCount == 0) {
335            final String msg = getMessage();
336            log(1, MSG_REQUIRED_REGEXP, msg);
337        }
338    }
339
340    /**
341     * Check if we can stop validation.
342     *
343     * @param ignore flag
344     * @return true is we can continue
345     */
346    private boolean canContinueValidation(boolean ignore) {
347        return errorCount <= errorLimit - 1
348                && (ignore || illegalPattern || checkForDuplicates);
349    }
350
351    /**
352     * Detect ignore situation.
353     *
354     * @param startLine position of line
355     * @param text file text
356     * @param start line column
357     * @return true is that need to be ignored
358     */
359    // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
360    @SuppressWarnings("deprecation")
361    private boolean isIgnore(int startLine, FileText text, LineColumn start) {
362        final LineColumn end;
363        if (matcher.end() == 0) {
364            end = text.lineColumn(0);
365        }
366        else {
367            end = text.lineColumn(matcher.end() - 1);
368        }
369        boolean ignore = false;
370        if (ignoreComments) {
371            final FileContents theFileContents = getFileContents();
372            final int startColumn = start.getColumn();
373            final int endLine = end.getLine();
374            final int endColumn = end.getColumn();
375            ignore = theFileContents.hasIntersectionWithComment(startLine,
376                startColumn, endLine, endColumn);
377        }
378        return ignore;
379    }
380
381    /**
382     * Displays the right message.
383     *
384     * @param lineNumber the line number the message relates to.
385     */
386    private void logMessage(int lineNumber) {
387        final String msg = getMessage();
388
389        if (illegalPattern) {
390            log(lineNumber, MSG_ILLEGAL_REGEXP, msg);
391        }
392        else {
393            log(lineNumber, MSG_DUPLICATE_REGEXP, msg);
394        }
395    }
396
397    /**
398     * Provide right message.
399     *
400     * @return message for violation.
401     */
402    private String getMessage() {
403        String msg;
404
405        if (message == null || message.isEmpty()) {
406            msg = format.pattern();
407        }
408        else {
409            msg = message;
410        }
411
412        if (errorCount >= errorLimit) {
413            msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg;
414        }
415
416        return msg;
417    }
418
419}