View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2024 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.checks.coding;
21  
22  import java.util.ArrayList;
23  import java.util.BitSet;
24  import java.util.HashMap;
25  import java.util.List;
26  import java.util.Map;
27  import java.util.regex.Pattern;
28  
29  import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
30  import com.puppycrawl.tools.checkstyle.PropertyType;
31  import com.puppycrawl.tools.checkstyle.XdocsPropertyType;
32  import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
33  import com.puppycrawl.tools.checkstyle.api.DetailAST;
34  import com.puppycrawl.tools.checkstyle.api.TokenTypes;
35  import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
36  import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
37  
38  /**
39   * <p>
40   * Checks for multiple occurrences of the same string literal within a single file.
41   * </p>
42   * <p>
43   * Rationale: Code duplication makes maintenance more difficult, so it can be better
44   * to replace the multiple occurrences with a constant.
45   * </p>
46   * <ul>
47   * <li>
48   * Property {@code allowedDuplicates} - Specify the maximum number of occurrences
49   * to allow without generating a warning.
50   * Type is {@code int}.
51   * Default value is {@code 1}.
52   * </li>
53   * <li>
54   * Property {@code ignoreOccurrenceContext} - Specify token type names where duplicate
55   * strings are ignored even if they don't match ignoredStringsRegexp. This allows you to
56   * exclude syntactical contexts like annotations or static initializers from the check.
57   * Type is {@code java.lang.String[]}.
58   * Validation type is {@code tokenTypesSet}.
59   * Default value is
60   * <a href="https://checkstyle.org/apidocs/com/puppycrawl/tools/checkstyle/api/TokenTypes.html#ANNOTATION">
61   * ANNOTATION</a>.
62   * </li>
63   * <li>
64   * Property {@code ignoreStringsRegexp} - Specify RegExp for ignored strings (with quotation marks).
65   * Type is {@code java.util.regex.Pattern}.
66   * Default value is {@code "^""$"}.
67   * </li>
68   * </ul>
69   * <p>
70   * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
71   * </p>
72   * <p>
73   * Violation Message Keys:
74   * </p>
75   * <ul>
76   * <li>
77   * {@code multiple.string.literal}
78   * </li>
79   * </ul>
80   *
81   * @since 3.5
82   */
83  @FileStatefulCheck
84  public class MultipleStringLiteralsCheck extends AbstractCheck {
85  
86      /**
87       * A key is pointing to the warning message text in "messages.properties"
88       * file.
89       */
90      public static final String MSG_KEY = "multiple.string.literal";
91  
92      /**
93       * Compiled pattern for all system newlines.
94       */
95      private static final Pattern ALL_NEW_LINES = Pattern.compile("\\R");
96  
97      /**
98       * String used to amend TEXT_BLOCK_CONTENT so that it matches STRING_LITERAL.
99       */
100     private static final String QUOTE = "\"";
101 
102     /**
103      * The found strings and their tokens.
104      */
105     private final Map<String, List<DetailAST>> stringMap = new HashMap<>();
106 
107     /**
108      * Specify token type names where duplicate strings are ignored even if they
109      * don't match ignoredStringsRegexp. This allows you to exclude syntactical
110      * contexts like annotations or static initializers from the check.
111      */
112     @XdocsPropertyType(PropertyType.TOKEN_ARRAY)
113     private final BitSet ignoreOccurrenceContext = new BitSet();
114 
115     /**
116      * Specify the maximum number of occurrences to allow without generating a warning.
117      */
118     private int allowedDuplicates = 1;
119 
120     /**
121      * Specify RegExp for ignored strings (with quotation marks).
122      */
123     private Pattern ignoreStringsRegexp;
124 
125     /**
126      * Construct an instance with default values.
127      */
128     public MultipleStringLiteralsCheck() {
129         setIgnoreStringsRegexp(Pattern.compile("^\"\"$"));
130         ignoreOccurrenceContext.set(TokenTypes.ANNOTATION);
131     }
132 
133     /**
134      * Setter to specify the maximum number of occurrences to allow without generating a warning.
135      *
136      * @param allowedDuplicates The maximum number of duplicates.
137      * @since 3.5
138      */
139     public void setAllowedDuplicates(int allowedDuplicates) {
140         this.allowedDuplicates = allowedDuplicates;
141     }
142 
143     /**
144      * Setter to specify RegExp for ignored strings (with quotation marks).
145      *
146      * @param ignoreStringsRegexp
147      *        regular expression pattern for ignored strings
148      * @noinspection WeakerAccess
149      * @noinspectionreason WeakerAccess - we avoid 'protected' when possible
150      * @since 4.0
151      */
152     public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) {
153         if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) {
154             this.ignoreStringsRegexp = null;
155         }
156         else {
157             this.ignoreStringsRegexp = ignoreStringsRegexp;
158         }
159     }
160 
161     /**
162      * Setter to specify token type names where duplicate strings are ignored even
163      * if they don't match ignoredStringsRegexp. This allows you to exclude
164      * syntactical contexts like annotations or static initializers from the check.
165      *
166      * @param strRep the string representation of the tokens interested in
167      * @since 4.4
168      */
169     public final void setIgnoreOccurrenceContext(String... strRep) {
170         ignoreOccurrenceContext.clear();
171         for (final String s : strRep) {
172             final int type = TokenUtil.getTokenId(s);
173             ignoreOccurrenceContext.set(type);
174         }
175     }
176 
177     @Override
178     public int[] getDefaultTokens() {
179         return getRequiredTokens();
180     }
181 
182     @Override
183     public int[] getAcceptableTokens() {
184         return getRequiredTokens();
185     }
186 
187     @Override
188     public int[] getRequiredTokens() {
189         return new int[] {
190             TokenTypes.STRING_LITERAL,
191             TokenTypes.TEXT_BLOCK_CONTENT,
192         };
193     }
194 
195     @Override
196     public void visitToken(DetailAST ast) {
197         if (!isInIgnoreOccurrenceContext(ast)) {
198             final String currentString;
199             if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) {
200                 final String strippedString =
201                     CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText());
202                 // We need to add quotes here to be consistent with STRING_LITERAL text.
203                 currentString = QUOTE + strippedString + QUOTE;
204             }
205             else {
206                 currentString = ast.getText();
207             }
208             if (ignoreStringsRegexp == null
209                     || !ignoreStringsRegexp.matcher(currentString).find()) {
210                 stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast);
211             }
212         }
213     }
214 
215     /**
216      * Analyses the path from the AST root to a given AST for occurrences
217      * of the token types in {@link #ignoreOccurrenceContext}.
218      *
219      * @param ast the node from where to start searching towards the root node
220      * @return whether the path from the root node to ast contains one of the
221      *     token type in {@link #ignoreOccurrenceContext}.
222      */
223     private boolean isInIgnoreOccurrenceContext(DetailAST ast) {
224         boolean isInIgnoreOccurrenceContext = false;
225         for (DetailAST token = ast; token != null; token = token.getParent()) {
226             final int type = token.getType();
227             if (ignoreOccurrenceContext.get(type)) {
228                 isInIgnoreOccurrenceContext = true;
229                 break;
230             }
231         }
232         return isInIgnoreOccurrenceContext;
233     }
234 
235     @Override
236     public void beginTree(DetailAST rootAST) {
237         stringMap.clear();
238     }
239 
240     @Override
241     public void finishTree(DetailAST rootAST) {
242         for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) {
243             final List<DetailAST> hits = stringListEntry.getValue();
244             if (hits.size() > allowedDuplicates) {
245                 final DetailAST firstFinding = hits.get(0);
246                 final String recurringString =
247                     ALL_NEW_LINES.matcher(
248                         stringListEntry.getKey()).replaceAll("\\\\n");
249                 log(firstFinding, MSG_KEY, recurringString, hits.size());
250             }
251         }
252     }
253 }
254