001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2025 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.regexp;
021
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024
025import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
026import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
027import com.puppycrawl.tools.checkstyle.api.DetailAST;
028import com.puppycrawl.tools.checkstyle.api.FileContents;
029import com.puppycrawl.tools.checkstyle.api.FileText;
030import com.puppycrawl.tools.checkstyle.api.LineColumn;
031import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
032
033/**
034 * <div>
035 * Checks that a specified pattern exists, exists less than
036 * a set number of times, or does not exist in the file.
037 * </div>
038 *
039 * <p>
040 * This check combines all the functionality provided by
041 * <a href="https://checkstyle.org/checks/header/regexpheader.html#RegexpHeader">RegexpHeader</a>
042 * except supplying the regular expression from a file.
043 * </p>
044 *
045 * <p>
046 * It differs from them in that it works in multiline mode. Its regular expression
047 * can span multiple lines and it checks this against the whole file at once.
048 * The others work in single-line mode. Their single or multiple regular expressions
049 * can only span one line. They check each of these against each line in the file in turn.
050 * </p>
051 *
052 * <p>
053 * <b>Note:</b> Because of the different mode of operation there may be some
054 * changes in the regular expressions used to achieve a particular end.
055 * </p>
056 *
057 * <p>
058 * In multiline mode...
059 * </p>
060 * <ul>
061 * <li>
062 * {@code ^} means the beginning of a line, as opposed to beginning of the input.
063 * </li>
064 * <li>
065 * For beginning of the input use {@code \A}.
066 * </li>
067 * <li>
068 * {@code $} means the end of a line, as opposed to the end of the input.
069 * </li>
070 * <li>
071 * For end of input use {@code \Z}.
072 * </li>
073 * <li>
074 * Each line in the file is terminated with a line feed character.
075 * </li>
076 * </ul>
077 *
078 * <p>
079 * <b>Note:</b> Not all regular expression engines are created equal.
080 * Some provide extra functions that others do not and some elements
081 * of the syntax may vary. This check makes use of the
082 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html">
083 * java.util.regex package</a>; please check its documentation for details
084 * of how to construct a regular expression to achieve a particular goal.
085 * </p>
086 *
087 * <p>
088 * <b>Note:</b> When entering a regular expression as a parameter in
089 * the XML config file you must also take into account the XML rules. e.g.
090 * if you want to match a &lt; symbol you need to enter &amp;lt;.
091 * The regular expression should be entered on one line.
092 * </p>
093 *
094 * <p>
095 * <b>Note:</b> To search for parentheses () in a regular expression
096 * you must escape them like \(\). This is required by the regexp engine,
097 * otherwise it will think they are special instruction characters.
098 * </p>
099 *
100 * <p>
101 * <b>Note:</b> To search for things that mean something in XML, like
102 * &lt; you need to escape them like &amp;lt;. This is required so the
103 * XML parser does not act on them, but instead passes the correct
104 * character to the regexp engine.
105 * </p>
106 * <ul>
107 * <li>
108 * Property {@code duplicateLimit} - Control whether to check for duplicates
109 * of a required pattern, any negative value means no checking for duplicates,
110 * any positive value is used as the maximum number of allowed duplicates,
111 * if the limit is exceeded violations will be logged.
112 * Type is {@code int}.
113 * Default value is {@code 0}.
114 * </li>
115 * <li>
116 * Property {@code errorLimit} - Specify the maximum number of violations before
117 * the check will abort.
118 * Type is {@code int}.
119 * Default value is {@code 100}.
120 * </li>
121 * <li>
122 * Property {@code format} - Specify the pattern to match against.
123 * Type is {@code java.util.regex.Pattern}.
124 * Default value is {@code "^$"}.
125 * </li>
126 * <li>
127 * Property {@code ignoreComments} - Control whether to ignore matches found within comments.
128 * Type is {@code boolean}.
129 * Default value is {@code false}.
130 * </li>
131 * <li>
132 * Property {@code illegalPattern} - Control whether the pattern is required or illegal.
133 * Type is {@code boolean}.
134 * Default value is {@code false}.
135 * </li>
136 * <li>
137 * Property {@code message} - Specify message which is used to notify about
138 * violations, if empty then the default (hard-coded) message is used.
139 * Type is {@code java.lang.String}.
140 * Default value is {@code null}.
141 * </li>
142 * </ul>
143 *
144 * <p>
145 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
146 * </p>
147 *
148 * <p>
149 * Violation Message Keys:
150 * </p>
151 * <ul>
152 * <li>
153 * {@code duplicate.regexp}
154 * </li>
155 * <li>
156 * {@code illegal.regexp}
157 * </li>
158 * <li>
159 * {@code required.regexp}
160 * </li>
161 * </ul>
162 *
163 * @since 4.0
164 */
165@FileStatefulCheck
166public class RegexpCheck extends AbstractCheck {
167
168    /**
169     * A key is pointing to the warning message text in "messages.properties"
170     * file.
171     */
172    public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp";
173
174    /**
175     * A key is pointing to the warning message text in "messages.properties"
176     * file.
177     */
178    public static final String MSG_REQUIRED_REGEXP = "required.regexp";
179
180    /**
181     * A key is pointing to the warning message text in "messages.properties"
182     * file.
183     */
184    public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp";
185
186    /** Default duplicate limit. */
187    private static final int DEFAULT_DUPLICATE_LIMIT = -1;
188
189    /** Default error report limit. */
190    private static final int DEFAULT_ERROR_LIMIT = 100;
191
192    /** Error count exceeded message. */
193    private static final String ERROR_LIMIT_EXCEEDED_MESSAGE =
194        "The error limit has been exceeded, "
195        + "the check is aborting, there may be more unreported errors.";
196
197    /**
198     * Specify message which is used to notify about violations,
199     * if empty then the default (hard-coded) message is used.
200     */
201    private String message;
202
203    /** Control whether to ignore matches found within comments. */
204    private boolean ignoreComments;
205
206    /** Control whether the pattern is required or illegal. */
207    private boolean illegalPattern;
208
209    /** Specify the maximum number of violations before the check will abort. */
210    private int errorLimit = DEFAULT_ERROR_LIMIT;
211
212    /**
213     * Control whether to check for duplicates of a required pattern,
214     * any negative value means no checking for duplicates,
215     * any positive value is used as the maximum number of allowed duplicates,
216     * if the limit is exceeded violations will be logged.
217     */
218    private int duplicateLimit;
219
220    /** Boolean to say if we should check for duplicates. */
221    private boolean checkForDuplicates;
222
223    /** Specify the pattern to match against. */
224    private Pattern format = Pattern.compile("^$", Pattern.MULTILINE);
225
226    /**
227     * Setter to specify message which is used to notify about violations,
228     * if empty then the default (hard-coded) message is used.
229     *
230     * @param message custom message which should be used in report.
231     * @since 4.0
232     */
233    public void setMessage(String message) {
234        this.message = message;
235    }
236
237    /**
238     * Setter to control whether to ignore matches found within comments.
239     *
240     * @param ignoreComments True if comments should be ignored.
241     * @since 4.0
242     */
243    public void setIgnoreComments(boolean ignoreComments) {
244        this.ignoreComments = ignoreComments;
245    }
246
247    /**
248     * Setter to control whether the pattern is required or illegal.
249     *
250     * @param illegalPattern True if pattern is not allowed.
251     * @since 4.0
252     */
253    public void setIllegalPattern(boolean illegalPattern) {
254        this.illegalPattern = illegalPattern;
255    }
256
257    /**
258     * Setter to specify the maximum number of violations before the check will abort.
259     *
260     * @param errorLimit the number of errors to report.
261     * @since 4.0
262     */
263    public void setErrorLimit(int errorLimit) {
264        this.errorLimit = errorLimit;
265    }
266
267    /**
268     * Setter to control whether to check for duplicates of a required pattern,
269     * any negative value means no checking for duplicates,
270     * any positive value is used as the maximum number of allowed duplicates,
271     * if the limit is exceeded violations will be logged.
272     *
273     * @param duplicateLimit negative values mean no duplicate checking,
274     *     any positive value is used as the limit.
275     * @since 4.0
276     */
277    public void setDuplicateLimit(int duplicateLimit) {
278        this.duplicateLimit = duplicateLimit;
279        checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT;
280    }
281
282    /**
283     * Setter to specify the pattern to match against.
284     *
285     * @param pattern the new pattern
286     * @since 4.0
287     */
288    public final void setFormat(Pattern pattern) {
289        format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE);
290    }
291
292    @Override
293    public int[] getDefaultTokens() {
294        return getRequiredTokens();
295    }
296
297    @Override
298    public int[] getAcceptableTokens() {
299        return getRequiredTokens();
300    }
301
302    @Override
303    public int[] getRequiredTokens() {
304        return CommonUtil.EMPTY_INT_ARRAY;
305    }
306
307    @Override
308    public void beginTree(DetailAST rootAST) {
309        processRegexpMatches();
310    }
311
312    /**
313     * Processes the regexp matches and logs the number of errors in the file.
314     *
315     */
316    // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
317    @SuppressWarnings("deprecation")
318    private void processRegexpMatches() {
319        final Matcher matcher = format.matcher(getFileContents().getText().getFullText());
320        int errorCount = 0;
321        int matchCount = 0;
322        final FileText text = getFileContents().getText();
323        while (errorCount < errorLimit && matcher.find()) {
324            final LineColumn start = text.lineColumn(matcher.start());
325            final int startLine = start.getLine();
326
327            final boolean ignore = isIgnore(startLine, text, start, matcher);
328            if (!ignore) {
329                matchCount++;
330                if (illegalPattern || checkForDuplicates
331                        && matchCount - 1 > duplicateLimit) {
332                    errorCount++;
333                    logMessage(startLine, errorCount);
334                }
335            }
336        }
337        if (!illegalPattern && matchCount == 0) {
338            final String msg = getMessage(errorCount);
339            log(1, MSG_REQUIRED_REGEXP, msg);
340        }
341    }
342
343    /**
344     * Detect ignore situation.
345     *
346     * @param startLine position of line
347     * @param text file text
348     * @param start line column
349     * @param matcher The matcher
350     * @return true is that need to be ignored
351     */
352    // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
353    @SuppressWarnings("deprecation")
354    private boolean isIgnore(int startLine, FileText text, LineColumn start, Matcher matcher) {
355        final LineColumn end;
356        if (matcher.end() == 0) {
357            end = text.lineColumn(0);
358        }
359        else {
360            end = text.lineColumn(matcher.end() - 1);
361        }
362        boolean ignore = false;
363        if (ignoreComments) {
364            final FileContents theFileContents = getFileContents();
365            final int startColumn = start.getColumn();
366            final int endLine = end.getLine();
367            final int endColumn = end.getColumn();
368            ignore = theFileContents.hasIntersectionWithComment(startLine,
369                startColumn, endLine, endColumn);
370        }
371        return ignore;
372    }
373
374    /**
375     * Displays the right message.
376     *
377     * @param lineNumber the line number the message relates to.
378     * @param errorCount number of errors in the file.
379     */
380    private void logMessage(int lineNumber, int errorCount) {
381        final String msg = getMessage(errorCount);
382
383        if (illegalPattern) {
384            log(lineNumber, MSG_ILLEGAL_REGEXP, msg);
385        }
386        else {
387            log(lineNumber, MSG_DUPLICATE_REGEXP, msg);
388        }
389    }
390
391    /**
392     * Provide right message.
393     *
394     * @param errorCount number of errors in the file.
395     * @return message for violation.
396     */
397    private String getMessage(int errorCount) {
398        String msg;
399
400        if (message == null || message.isEmpty()) {
401            msg = format.pattern();
402        }
403        else {
404            msg = message;
405        }
406
407        if (errorCount >= errorLimit) {
408            msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg;
409        }
410
411        return msg;
412    }
413}