1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package com.puppycrawl.tools.checkstyle.checks;
21
22 import java.util.ArrayList;
23 import java.util.HashMap;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
28
29 import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
30 import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
31 import com.puppycrawl.tools.checkstyle.api.DetailAST;
32 import com.puppycrawl.tools.checkstyle.api.TokenTypes;
33 import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50 @FileStatefulCheck
51 public class AvoidEscapedUnicodeCharactersCheck
52 extends AbstractCheck {
53
54
55
56
57
58 public static final String MSG_KEY = "forbid.escaped.unicode.char";
59
60
61 private static final Pattern UNICODE_REGEXP = Pattern.compile("\\\\u+[a-fA-F\\d]{4}");
62
63
64
65
66
67
68
69 private static final Pattern UNICODE_CONTROL = Pattern.compile("\\\\u+"
70 + "(00[0-1][\\dA-Fa-f]"
71 + "|00[8-9][\\dA-Fa-f]"
72 + "|00[aA][dD]"
73 + "|034[fF]"
74 + "|070[fF]"
75 + "|180[eE]"
76 + "|200[b-fB-F]"
77 + "|202[a-eA-E]"
78 + "|206[0-4a-fA-F]"
79 + "|[fF]{3}[9a-bA-B]"
80 + "|[fF][eE][fF]{2})");
81
82
83
84
85
86
87 private static final Pattern ALL_ESCAPED_CHARS = Pattern.compile("^("
88 + UNICODE_REGEXP.pattern()
89 + "|\""
90 + "|'"
91 + "|\\\\"
92 + "|\\\\b"
93 + "|\\\\f"
94 + "|\\\\n"
95 + "|\\R"
96 + "|\\\\r"
97 + "|\\\\s"
98 + "|\\\\t"
99 + ")+$");
100
101
102 private static final Pattern ESCAPED_BACKSLASH = Pattern.compile("\\\\\\\\");
103
104
105 private static final Pattern NON_PRINTABLE_CHARS = Pattern.compile("\\\\u0000"
106 + "|\\\\u0009"
107 + "|\\\\u000[bB]"
108 + "|\\\\u000[cC]"
109 + "|\\\\u0020"
110 + "|\\\\u007[fF]"
111 + "|\\\\u0085"
112 + "|\\\\u009[fF]"
113 + "|\\\\u00[aA]0"
114 + "|\\\\u00[aA][dD]"
115 + "|\\\\u04[fF]9"
116 + "|\\\\u05[bB][eE]"
117 + "|\\\\u05[dD]0"
118 + "|\\\\u05[eE][aA]"
119 + "|\\\\u05[fF]3"
120 + "|\\\\u05[fF]4"
121 + "|\\\\u0600"
122 + "|\\\\u0604"
123 + "|\\\\u061[cC]"
124 + "|\\\\u06[dD]{2}"
125 + "|\\\\u06[fF]{2}"
126 + "|\\\\u070[fF]"
127 + "|\\\\u0750"
128 + "|\\\\u077[fF]"
129 + "|\\\\u0[eE]00"
130 + "|\\\\u0[eE]7[fF]"
131 + "|\\\\u1680"
132 + "|\\\\u180[eE]"
133 + "|\\\\u1[eE]00"
134 + "|\\\\u2000"
135 + "|\\\\u2001"
136 + "|\\\\u2002"
137 + "|\\\\u2003"
138 + "|\\\\u2004"
139 + "|\\\\u2005"
140 + "|\\\\u2006"
141 + "|\\\\u2007"
142 + "|\\\\u2008"
143 + "|\\\\u2009"
144 + "|\\\\u200[aA]"
145 + "|\\\\u200[fF]"
146 + "|\\\\u2025"
147 + "|\\\\u2028"
148 + "|\\\\u2029"
149 + "|\\\\u202[fF]"
150 + "|\\\\u205[fF]"
151 + "|\\\\u2064"
152 + "|\\\\u2066"
153 + "|\\\\u2067"
154 + "|\\\\u2068"
155 + "|\\\\u2069"
156 + "|\\\\u206[aA]"
157 + "|\\\\u206[fF]"
158 + "|\\\\u20[aA][fF]"
159 + "|\\\\u2100"
160 + "|\\\\u213[aA]"
161 + "|\\\\u3000"
162 + "|\\\\u[dD]800"
163 + "|\\\\u[fF]8[fF]{2}"
164 + "|\\\\u[fF][bB]50"
165 + "|\\\\u[fF][dD][fF]{2}"
166 + "|\\\\u[fF][eE]70"
167 + "|\\\\u[fF][eE][fF]{2}"
168 + "|\\\\u[fF]{2}0[eE]"
169 + "|\\\\u[fF]{2}61"
170 + "|\\\\u[fF]{2}[dD][cC]"
171 + "|\\\\u[fF]{3}9"
172 + "|\\\\u[fF]{3}[aA]"
173 + "|\\\\u[fF]{3}[bB]"
174 + "|\\\\u[fF]{4}");
175
176
177
178
179
180
181 private final Map<Integer, List<DetailAST>> pendingViolations = new HashMap<>();
182
183
184 private boolean allowEscapesForControlCharacters;
185
186
187 private boolean allowByTailComment;
188
189
190 private boolean allowIfAllCharactersEscaped;
191
192
193 private boolean allowNonPrintableEscapes;
194
195
196
197
198
199
200
201 public final void setAllowEscapesForControlCharacters(boolean allow) {
202 allowEscapesForControlCharacters = allow;
203 }
204
205
206
207
208
209
210
211 public final void setAllowByTailComment(boolean allow) {
212 allowByTailComment = allow;
213 }
214
215
216
217
218
219
220
221 public final void setAllowIfAllCharactersEscaped(boolean allow) {
222 allowIfAllCharactersEscaped = allow;
223 }
224
225
226
227
228
229
230
231 public final void setAllowNonPrintableEscapes(boolean allow) {
232 allowNonPrintableEscapes = allow;
233 }
234
235 @Override
236 public int[] getDefaultTokens() {
237 return getRequiredTokens();
238 }
239
240 @Override
241 public int[] getAcceptableTokens() {
242 return getRequiredTokens();
243 }
244
245 @Override
246 public int[] getRequiredTokens() {
247 return new int[] {
248 TokenTypes.STRING_LITERAL,
249 TokenTypes.CHAR_LITERAL,
250 TokenTypes.TEXT_BLOCK_CONTENT,
251 TokenTypes.SINGLE_LINE_COMMENT,
252 TokenTypes.BLOCK_COMMENT_BEGIN,
253 };
254 }
255
256 @Override
257 public boolean isCommentNodesRequired() {
258 return true;
259 }
260
261 @Override
262 public void beginTree(DetailAST rootAST) {
263 pendingViolations.clear();
264 }
265
266 @Override
267 public void visitToken(DetailAST ast) {
268 if (ast.getType() == TokenTypes.SINGLE_LINE_COMMENT
269 || ast.getType() == TokenTypes.BLOCK_COMMENT_BEGIN) {
270 checkComment(ast);
271 }
272 else {
273 checkLiteral(ast);
274 }
275 }
276
277 @Override
278 public void finishTree(DetailAST rootAST) {
279 for (List<DetailAST> asts : pendingViolations.values()) {
280 for (DetailAST ast : asts) {
281 log(ast, MSG_KEY);
282 }
283 }
284 }
285
286
287
288
289
290
291
292 private void checkLiteral(DetailAST ast) {
293 final String literal =
294 CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText());
295
296 if (hasUnicodeChar(literal) && !(isAllCharactersEscaped(literal)
297 || allowEscapesForControlCharacters
298 && isOnlyUnicodeValidChars(literal, UNICODE_CONTROL)
299 || allowNonPrintableEscapes
300 && isOnlyUnicodeValidChars(literal, NON_PRINTABLE_CHARS))) {
301
302 if (allowByTailComment) {
303 int lineNo = ast.getLineNo();
304 if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) {
305 lineNo = ast.getNextSibling().getLineNo();
306 }
307 pendingViolations.computeIfAbsent(lineNo, key -> new ArrayList<>()).add(ast);
308 }
309 else {
310 log(ast, MSG_KEY);
311 }
312 }
313 }
314
315
316
317
318
319
320 private void checkComment(DetailAST comment) {
321 if (isTrailingComment(comment)) {
322 pendingViolations.remove(comment.getLineNo());
323 }
324 }
325
326
327
328
329
330
331
332 private static boolean isTrailingComment(DetailAST commentNode) {
333 final DetailAST nextSibling = commentNode.getNextSibling();
334 return nextSibling == null || nextSibling.getLineNo() != commentNode.getLineNo();
335 }
336
337
338
339
340
341
342
343 private static boolean hasUnicodeChar(String literal) {
344 final String literalWithoutEscapedBackslashes =
345 ESCAPED_BACKSLASH.matcher(literal).replaceAll("");
346 return UNICODE_REGEXP.matcher(literalWithoutEscapedBackslashes).find();
347 }
348
349
350
351
352
353
354
355
356 private static boolean isOnlyUnicodeValidChars(String literal, Pattern pattern) {
357 final int unicodeMatchesCounter =
358 countMatches(UNICODE_REGEXP, literal);
359 final int unicodeValidMatchesCounter =
360 countMatches(pattern, literal);
361 return unicodeMatchesCounter - unicodeValidMatchesCounter == 0;
362 }
363
364
365
366
367
368
369
370
371 private static int countMatches(Pattern pattern, String target) {
372 int matcherCounter = 0;
373 final Matcher matcher = pattern.matcher(target);
374 while (matcher.find()) {
375 matcherCounter++;
376 }
377 return matcherCounter;
378 }
379
380
381
382
383
384
385
386 private boolean isAllCharactersEscaped(String literal) {
387 return allowIfAllCharactersEscaped
388 && ALL_ESCAPED_CHARS.matcher(literal).find();
389 }
390 }