001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2024 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.coding;
021
022import java.util.ArrayList;
023import java.util.BitSet;
024import java.util.HashMap;
025import java.util.List;
026import java.util.Map;
027import java.util.regex.Pattern;
028
029import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
030import com.puppycrawl.tools.checkstyle.PropertyType;
031import com.puppycrawl.tools.checkstyle.XdocsPropertyType;
032import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
033import com.puppycrawl.tools.checkstyle.api.DetailAST;
034import com.puppycrawl.tools.checkstyle.api.TokenTypes;
035import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
036import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
037
038/**
039 * <p>
040 * Checks for multiple occurrences of the same string literal within a single file.
041 * </p>
042 * <p>
043 * Rationale: Code duplication makes maintenance more difficult, so it can be better
044 * to replace the multiple occurrences with a constant.
045 * </p>
046 * <ul>
047 * <li>
048 * Property {@code allowedDuplicates} - Specify the maximum number of occurrences
049 * to allow without generating a warning.
050 * Type is {@code int}.
051 * Default value is {@code 1}.
052 * </li>
053 * <li>
054 * Property {@code ignoreOccurrenceContext} - Specify token type names where duplicate
055 * strings are ignored even if they don't match ignoredStringsRegexp. This allows you to
056 * exclude syntactical contexts like annotations or static initializers from the check.
057 * Type is {@code java.lang.String[]}.
058 * Validation type is {@code tokenTypesSet}.
059 * Default value is
060 * <a href="https://checkstyle.org/apidocs/com/puppycrawl/tools/checkstyle/api/TokenTypes.html#ANNOTATION">
061 * ANNOTATION</a>.
062 * </li>
063 * <li>
064 * Property {@code ignoreStringsRegexp} - Specify RegExp for ignored strings (with quotation marks).
065 * Type is {@code java.util.regex.Pattern}.
066 * Default value is {@code "^""$"}.
067 * </li>
068 * </ul>
069 * <p>
070 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
071 * </p>
072 * <p>
073 * Violation Message Keys:
074 * </p>
075 * <ul>
076 * <li>
077 * {@code multiple.string.literal}
078 * </li>
079 * </ul>
080 *
081 * @since 3.5
082 */
083@FileStatefulCheck
084public class MultipleStringLiteralsCheck extends AbstractCheck {
085
086    /**
087     * A key is pointing to the warning message text in "messages.properties"
088     * file.
089     */
090    public static final String MSG_KEY = "multiple.string.literal";
091
092    /**
093     * Compiled pattern for all system newlines.
094     */
095    private static final Pattern ALL_NEW_LINES = Pattern.compile("\\R");
096
097    /**
098     * String used to amend TEXT_BLOCK_CONTENT so that it matches STRING_LITERAL.
099     */
100    private static final String QUOTE = "\"";
101
102    /**
103     * The found strings and their tokens.
104     */
105    private final Map<String, List<DetailAST>> stringMap = new HashMap<>();
106
107    /**
108     * Specify token type names where duplicate strings are ignored even if they
109     * don't match ignoredStringsRegexp. This allows you to exclude syntactical
110     * contexts like annotations or static initializers from the check.
111     */
112    @XdocsPropertyType(PropertyType.TOKEN_ARRAY)
113    private final BitSet ignoreOccurrenceContext = new BitSet();
114
115    /**
116     * Specify the maximum number of occurrences to allow without generating a warning.
117     */
118    private int allowedDuplicates = 1;
119
120    /**
121     * Specify RegExp for ignored strings (with quotation marks).
122     */
123    private Pattern ignoreStringsRegexp;
124
125    /**
126     * Construct an instance with default values.
127     */
128    public MultipleStringLiteralsCheck() {
129        setIgnoreStringsRegexp(Pattern.compile("^\"\"$"));
130        ignoreOccurrenceContext.set(TokenTypes.ANNOTATION);
131    }
132
133    /**
134     * Setter to specify the maximum number of occurrences to allow without generating a warning.
135     *
136     * @param allowedDuplicates The maximum number of duplicates.
137     * @since 3.5
138     */
139    public void setAllowedDuplicates(int allowedDuplicates) {
140        this.allowedDuplicates = allowedDuplicates;
141    }
142
143    /**
144     * Setter to specify RegExp for ignored strings (with quotation marks).
145     *
146     * @param ignoreStringsRegexp
147     *        regular expression pattern for ignored strings
148     * @noinspection WeakerAccess
149     * @noinspectionreason WeakerAccess - we avoid 'protected' when possible
150     * @since 4.0
151     */
152    public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) {
153        if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) {
154            this.ignoreStringsRegexp = null;
155        }
156        else {
157            this.ignoreStringsRegexp = ignoreStringsRegexp;
158        }
159    }
160
161    /**
162     * Setter to specify token type names where duplicate strings are ignored even
163     * if they don't match ignoredStringsRegexp. This allows you to exclude
164     * syntactical contexts like annotations or static initializers from the check.
165     *
166     * @param strRep the string representation of the tokens interested in
167     * @since 4.4
168     */
169    public final void setIgnoreOccurrenceContext(String... strRep) {
170        ignoreOccurrenceContext.clear();
171        for (final String s : strRep) {
172            final int type = TokenUtil.getTokenId(s);
173            ignoreOccurrenceContext.set(type);
174        }
175    }
176
177    @Override
178    public int[] getDefaultTokens() {
179        return getRequiredTokens();
180    }
181
182    @Override
183    public int[] getAcceptableTokens() {
184        return getRequiredTokens();
185    }
186
187    @Override
188    public int[] getRequiredTokens() {
189        return new int[] {
190            TokenTypes.STRING_LITERAL,
191            TokenTypes.TEXT_BLOCK_CONTENT,
192        };
193    }
194
195    @Override
196    public void visitToken(DetailAST ast) {
197        if (!isInIgnoreOccurrenceContext(ast)) {
198            final String currentString;
199            if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) {
200                final String strippedString =
201                    CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText());
202                // We need to add quotes here to be consistent with STRING_LITERAL text.
203                currentString = QUOTE + strippedString + QUOTE;
204            }
205            else {
206                currentString = ast.getText();
207            }
208            if (ignoreStringsRegexp == null
209                    || !ignoreStringsRegexp.matcher(currentString).find()) {
210                stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast);
211            }
212        }
213    }
214
215    /**
216     * Analyses the path from the AST root to a given AST for occurrences
217     * of the token types in {@link #ignoreOccurrenceContext}.
218     *
219     * @param ast the node from where to start searching towards the root node
220     * @return whether the path from the root node to ast contains one of the
221     *     token type in {@link #ignoreOccurrenceContext}.
222     */
223    private boolean isInIgnoreOccurrenceContext(DetailAST ast) {
224        boolean isInIgnoreOccurrenceContext = false;
225        for (DetailAST token = ast; token != null; token = token.getParent()) {
226            final int type = token.getType();
227            if (ignoreOccurrenceContext.get(type)) {
228                isInIgnoreOccurrenceContext = true;
229                break;
230            }
231        }
232        return isInIgnoreOccurrenceContext;
233    }
234
235    @Override
236    public void beginTree(DetailAST rootAST) {
237        stringMap.clear();
238    }
239
240    @Override
241    public void finishTree(DetailAST rootAST) {
242        for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) {
243            final List<DetailAST> hits = stringListEntry.getValue();
244            if (hits.size() > allowedDuplicates) {
245                final DetailAST firstFinding = hits.get(0);
246                final String recurringString =
247                    ALL_NEW_LINES.matcher(
248                        stringListEntry.getKey()).replaceAll("\\\\n");
249                log(firstFinding, MSG_KEY, recurringString, hits.size());
250            }
251        }
252    }
253}
254