1 ///////////////////////////////////////////////////////////////////////////////////////////////
2 // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3 // Copyright (C) 2001-2025 the original author or authors.
4 //
5 // This library is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU Lesser General Public
7 // License as published by the Free Software Foundation; either
8 // version 2.1 of the License, or (at your option) any later version.
9 //
10 // This library is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public
16 // License along with this library; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 ///////////////////////////////////////////////////////////////////////////////////////////////
19
20 package com.puppycrawl.tools.checkstyle.checks.coding;
21
22 import java.util.ArrayList;
23 import java.util.BitSet;
24 import java.util.HashMap;
25 import java.util.List;
26 import java.util.Map;
27 import java.util.regex.Pattern;
28
29 import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
30 import com.puppycrawl.tools.checkstyle.PropertyType;
31 import com.puppycrawl.tools.checkstyle.XdocsPropertyType;
32 import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
33 import com.puppycrawl.tools.checkstyle.api.DetailAST;
34 import com.puppycrawl.tools.checkstyle.api.TokenTypes;
35 import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
36 import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
37
38 /**
39 * <div>
40 * Checks for multiple occurrences of the same string literal within a single file.
41 * </div>
42 *
43 * <p>
44 * Rationale: Code duplication makes maintenance more difficult, so it can be better
45 * to replace the multiple occurrences with a constant.
46 * </p>
47 *
48 * @since 3.5
49 */
50 @FileStatefulCheck
51 public class MultipleStringLiteralsCheck extends AbstractCheck {
52
53 /**
54 * A key is pointing to the warning message text in "messages.properties"
55 * file.
56 */
57 public static final String MSG_KEY = "multiple.string.literal";
58
59 /**
60 * Compiled pattern for all system newlines.
61 */
62 private static final Pattern ALL_NEW_LINES = Pattern.compile("\\R");
63
64 /**
65 * String used to amend TEXT_BLOCK_CONTENT so that it matches STRING_LITERAL.
66 */
67 private static final String QUOTE = "\"";
68
69 /**
70 * The found strings and their tokens.
71 */
72 private final Map<String, List<DetailAST>> stringMap = new HashMap<>();
73
74 /**
75 * Specify token type names where duplicate strings are ignored even if they
76 * don't match ignoredStringsRegexp. This allows you to exclude syntactical
77 * contexts like annotations or static initializers from the check.
78 */
79 @XdocsPropertyType(PropertyType.TOKEN_ARRAY)
80 private final BitSet ignoreOccurrenceContext = new BitSet();
81
82 /**
83 * Specify the maximum number of occurrences to allow without generating a warning.
84 */
85 private int allowedDuplicates = 1;
86
87 /**
88 * Specify RegExp for ignored strings (with quotation marks).
89 */
90 private Pattern ignoreStringsRegexp;
91
92 /**
93 * Construct an instance with default values.
94 */
95 public MultipleStringLiteralsCheck() {
96 setIgnoreStringsRegexp(Pattern.compile("^\"\"$"));
97 ignoreOccurrenceContext.set(TokenTypes.ANNOTATION);
98 }
99
100 /**
101 * Setter to specify the maximum number of occurrences to allow without generating a warning.
102 *
103 * @param allowedDuplicates The maximum number of duplicates.
104 * @since 3.5
105 */
106 public void setAllowedDuplicates(int allowedDuplicates) {
107 this.allowedDuplicates = allowedDuplicates;
108 }
109
110 /**
111 * Setter to specify RegExp for ignored strings (with quotation marks).
112 *
113 * @param ignoreStringsRegexp
114 * regular expression pattern for ignored strings
115 * @noinspection WeakerAccess
116 * @noinspectionreason WeakerAccess - we avoid 'protected' when possible
117 * @since 4.0
118 */
119 public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) {
120 if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) {
121 this.ignoreStringsRegexp = null;
122 }
123 else {
124 this.ignoreStringsRegexp = ignoreStringsRegexp;
125 }
126 }
127
128 /**
129 * Setter to specify token type names where duplicate strings are ignored even
130 * if they don't match ignoredStringsRegexp. This allows you to exclude
131 * syntactical contexts like annotations or static initializers from the check.
132 *
133 * @param strRep the string representation of the tokens interested in
134 * @since 4.4
135 */
136 public final void setIgnoreOccurrenceContext(String... strRep) {
137 ignoreOccurrenceContext.clear();
138 for (final String s : strRep) {
139 final int type = TokenUtil.getTokenId(s);
140 ignoreOccurrenceContext.set(type);
141 }
142 }
143
144 @Override
145 public int[] getDefaultTokens() {
146 return getRequiredTokens();
147 }
148
149 @Override
150 public int[] getAcceptableTokens() {
151 return getRequiredTokens();
152 }
153
154 @Override
155 public int[] getRequiredTokens() {
156 return new int[] {
157 TokenTypes.STRING_LITERAL,
158 TokenTypes.TEXT_BLOCK_CONTENT,
159 };
160 }
161
162 @Override
163 public void visitToken(DetailAST ast) {
164 if (!isInIgnoreOccurrenceContext(ast)) {
165 final String currentString;
166 if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) {
167 final String strippedString =
168 CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText());
169 // We need to add quotes here to be consistent with STRING_LITERAL text.
170 currentString = QUOTE + strippedString + QUOTE;
171 }
172 else {
173 currentString = ast.getText();
174 }
175 if (ignoreStringsRegexp == null
176 || !ignoreStringsRegexp.matcher(currentString).find()) {
177 stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast);
178 }
179 }
180 }
181
182 /**
183 * Analyses the path from the AST root to a given AST for occurrences
184 * of the token types in {@link #ignoreOccurrenceContext}.
185 *
186 * @param ast the node from where to start searching towards the root node
187 * @return whether the path from the root node to ast contains one of the
188 * token type in {@link #ignoreOccurrenceContext}.
189 */
190 private boolean isInIgnoreOccurrenceContext(DetailAST ast) {
191 boolean isInIgnoreOccurrenceContext = false;
192 for (DetailAST token = ast; token != null; token = token.getParent()) {
193 final int type = token.getType();
194 if (ignoreOccurrenceContext.get(type)) {
195 isInIgnoreOccurrenceContext = true;
196 break;
197 }
198 }
199 return isInIgnoreOccurrenceContext;
200 }
201
202 @Override
203 public void beginTree(DetailAST rootAST) {
204 stringMap.clear();
205 }
206
207 @Override
208 public void finishTree(DetailAST rootAST) {
209 for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) {
210 final List<DetailAST> hits = stringListEntry.getValue();
211 if (hits.size() > allowedDuplicates) {
212 final DetailAST firstFinding = hits.get(0);
213 final String recurringString =
214 ALL_NEW_LINES.matcher(
215 stringListEntry.getKey()).replaceAll("\\\\n");
216 log(firstFinding, MSG_KEY, recurringString, hits.size());
217 }
218 }
219 }
220 }