View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2024 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.checks.coding;
21  
22  import java.util.ArrayList;
23  import java.util.BitSet;
24  import java.util.HashMap;
25  import java.util.List;
26  import java.util.Map;
27  import java.util.regex.Pattern;
28  
29  import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
30  import com.puppycrawl.tools.checkstyle.PropertyType;
31  import com.puppycrawl.tools.checkstyle.XdocsPropertyType;
32  import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
33  import com.puppycrawl.tools.checkstyle.api.DetailAST;
34  import com.puppycrawl.tools.checkstyle.api.TokenTypes;
35  import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
36  import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
37  
38  /**
39   * <div>
40   * Checks for multiple occurrences of the same string literal within a single file.
41   * </div>
42   *
43   * <p>
44   * Rationale: Code duplication makes maintenance more difficult, so it can be better
45   * to replace the multiple occurrences with a constant.
46   * </p>
47   * <ul>
48   * <li>
49   * Property {@code allowedDuplicates} - Specify the maximum number of occurrences
50   * to allow without generating a warning.
51   * Type is {@code int}.
52   * Default value is {@code 1}.
53   * </li>
54   * <li>
55   * Property {@code ignoreOccurrenceContext} - Specify token type names where duplicate
56   * strings are ignored even if they don't match ignoredStringsRegexp. This allows you to
57   * exclude syntactical contexts like annotations or static initializers from the check.
58   * Type is {@code java.lang.String[]}.
59   * Validation type is {@code tokenTypesSet}.
60   * Default value is
61   * <a href="https://checkstyle.org/apidocs/com/puppycrawl/tools/checkstyle/api/TokenTypes.html#ANNOTATION">
62   * ANNOTATION</a>.
63   * </li>
64   * <li>
65   * Property {@code ignoreStringsRegexp} - Specify RegExp for ignored strings (with quotation marks).
66   * Type is {@code java.util.regex.Pattern}.
67   * Default value is {@code "^""$"}.
68   * </li>
69   * </ul>
70   *
71   * <p>
72   * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
73   * </p>
74   *
75   * <p>
76   * Violation Message Keys:
77   * </p>
78   * <ul>
79   * <li>
80   * {@code multiple.string.literal}
81   * </li>
82   * </ul>
83   *
84   * @since 3.5
85   */
86  @FileStatefulCheck
87  public class MultipleStringLiteralsCheck extends AbstractCheck {
88  
89      /**
90       * A key is pointing to the warning message text in "messages.properties"
91       * file.
92       */
93      public static final String MSG_KEY = "multiple.string.literal";
94  
95      /**
96       * Compiled pattern for all system newlines.
97       */
98      private static final Pattern ALL_NEW_LINES = Pattern.compile("\\R");
99  
100     /**
101      * String used to amend TEXT_BLOCK_CONTENT so that it matches STRING_LITERAL.
102      */
103     private static final String QUOTE = "\"";
104 
105     /**
106      * The found strings and their tokens.
107      */
108     private final Map<String, List<DetailAST>> stringMap = new HashMap<>();
109 
110     /**
111      * Specify token type names where duplicate strings are ignored even if they
112      * don't match ignoredStringsRegexp. This allows you to exclude syntactical
113      * contexts like annotations or static initializers from the check.
114      */
115     @XdocsPropertyType(PropertyType.TOKEN_ARRAY)
116     private final BitSet ignoreOccurrenceContext = new BitSet();
117 
118     /**
119      * Specify the maximum number of occurrences to allow without generating a warning.
120      */
121     private int allowedDuplicates = 1;
122 
123     /**
124      * Specify RegExp for ignored strings (with quotation marks).
125      */
126     private Pattern ignoreStringsRegexp;
127 
128     /**
129      * Construct an instance with default values.
130      */
131     public MultipleStringLiteralsCheck() {
132         setIgnoreStringsRegexp(Pattern.compile("^\"\"$"));
133         ignoreOccurrenceContext.set(TokenTypes.ANNOTATION);
134     }
135 
136     /**
137      * Setter to specify the maximum number of occurrences to allow without generating a warning.
138      *
139      * @param allowedDuplicates The maximum number of duplicates.
140      * @since 3.5
141      */
142     public void setAllowedDuplicates(int allowedDuplicates) {
143         this.allowedDuplicates = allowedDuplicates;
144     }
145 
146     /**
147      * Setter to specify RegExp for ignored strings (with quotation marks).
148      *
149      * @param ignoreStringsRegexp
150      *        regular expression pattern for ignored strings
151      * @noinspection WeakerAccess
152      * @noinspectionreason WeakerAccess - we avoid 'protected' when possible
153      * @since 4.0
154      */
155     public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) {
156         if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) {
157             this.ignoreStringsRegexp = null;
158         }
159         else {
160             this.ignoreStringsRegexp = ignoreStringsRegexp;
161         }
162     }
163 
164     /**
165      * Setter to specify token type names where duplicate strings are ignored even
166      * if they don't match ignoredStringsRegexp. This allows you to exclude
167      * syntactical contexts like annotations or static initializers from the check.
168      *
169      * @param strRep the string representation of the tokens interested in
170      * @since 4.4
171      */
172     public final void setIgnoreOccurrenceContext(String... strRep) {
173         ignoreOccurrenceContext.clear();
174         for (final String s : strRep) {
175             final int type = TokenUtil.getTokenId(s);
176             ignoreOccurrenceContext.set(type);
177         }
178     }
179 
180     @Override
181     public int[] getDefaultTokens() {
182         return getRequiredTokens();
183     }
184 
185     @Override
186     public int[] getAcceptableTokens() {
187         return getRequiredTokens();
188     }
189 
190     @Override
191     public int[] getRequiredTokens() {
192         return new int[] {
193             TokenTypes.STRING_LITERAL,
194             TokenTypes.TEXT_BLOCK_CONTENT,
195         };
196     }
197 
198     @Override
199     public void visitToken(DetailAST ast) {
200         if (!isInIgnoreOccurrenceContext(ast)) {
201             final String currentString;
202             if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) {
203                 final String strippedString =
204                     CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText());
205                 // We need to add quotes here to be consistent with STRING_LITERAL text.
206                 currentString = QUOTE + strippedString + QUOTE;
207             }
208             else {
209                 currentString = ast.getText();
210             }
211             if (ignoreStringsRegexp == null
212                     || !ignoreStringsRegexp.matcher(currentString).find()) {
213                 stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast);
214             }
215         }
216     }
217 
218     /**
219      * Analyses the path from the AST root to a given AST for occurrences
220      * of the token types in {@link #ignoreOccurrenceContext}.
221      *
222      * @param ast the node from where to start searching towards the root node
223      * @return whether the path from the root node to ast contains one of the
224      *     token type in {@link #ignoreOccurrenceContext}.
225      */
226     private boolean isInIgnoreOccurrenceContext(DetailAST ast) {
227         boolean isInIgnoreOccurrenceContext = false;
228         for (DetailAST token = ast; token != null; token = token.getParent()) {
229             final int type = token.getType();
230             if (ignoreOccurrenceContext.get(type)) {
231                 isInIgnoreOccurrenceContext = true;
232                 break;
233             }
234         }
235         return isInIgnoreOccurrenceContext;
236     }
237 
238     @Override
239     public void beginTree(DetailAST rootAST) {
240         stringMap.clear();
241     }
242 
243     @Override
244     public void finishTree(DetailAST rootAST) {
245         for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) {
246             final List<DetailAST> hits = stringListEntry.getValue();
247             if (hits.size() > allowedDuplicates) {
248                 final DetailAST firstFinding = hits.get(0);
249                 final String recurringString =
250                     ALL_NEW_LINES.matcher(
251                         stringListEntry.getKey()).replaceAll("\\\\n");
252                 log(firstFinding, MSG_KEY, recurringString, hits.size());
253             }
254         }
255     }
256 }
257