1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package com.puppycrawl.tools.checkstyle.checks;
21
22 import java.util.Arrays;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.regex.Matcher;
26 import java.util.regex.Pattern;
27
28 import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
29 import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
30 import com.puppycrawl.tools.checkstyle.api.DetailAST;
31 import com.puppycrawl.tools.checkstyle.api.TextBlock;
32 import com.puppycrawl.tools.checkstyle.api.TokenTypes;
33 import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
34 import com.puppycrawl.tools.checkstyle.utils.CodePointUtil;
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86 @FileStatefulCheck
87 public class AvoidEscapedUnicodeCharactersCheck
88 extends AbstractCheck {
89
90
91
92
93
94 public static final String MSG_KEY = "forbid.escaped.unicode.char";
95
96
97 private static final Pattern UNICODE_REGEXP = Pattern.compile("\\\\u+[a-fA-F\\d]{4}");
98
99
100
101
102
103
104
105 private static final Pattern UNICODE_CONTROL = Pattern.compile("\\\\u+"
106 + "(00[0-1][\\dA-Fa-f]"
107 + "|00[8-9][\\dA-Fa-f]"
108 + "|00[aA][dD]"
109 + "|034[fF]"
110 + "|070[fF]"
111 + "|180[eE]"
112 + "|200[b-fB-F]"
113 + "|202[a-eA-E]"
114 + "|206[0-4a-fA-F]"
115 + "|[fF]{3}[9a-bA-B]"
116 + "|[fF][eE][fF]{2})");
117
118
119
120
121
122
123 private static final Pattern ALL_ESCAPED_CHARS = Pattern.compile("^("
124 + UNICODE_REGEXP.pattern()
125 + "|\""
126 + "|'"
127 + "|\\\\"
128 + "|\\\\b"
129 + "|\\\\f"
130 + "|\\\\n"
131 + "|\\R"
132 + "|\\\\r"
133 + "|\\\\s"
134 + "|\\\\t"
135 + ")+$");
136
137
138 private static final Pattern ESCAPED_BACKSLASH = Pattern.compile("\\\\\\\\");
139
140
141 private static final Pattern NON_PRINTABLE_CHARS = Pattern.compile("\\\\u0000"
142 + "|\\\\u0009"
143 + "|\\\\u000[bB]"
144 + "|\\\\u000[cC]"
145 + "|\\\\u0020"
146 + "|\\\\u007[fF]"
147 + "|\\\\u0085"
148 + "|\\\\u009[fF]"
149 + "|\\\\u00[aA]0"
150 + "|\\\\u00[aA][dD]"
151 + "|\\\\u04[fF]9"
152 + "|\\\\u05[bB][eE]"
153 + "|\\\\u05[dD]0"
154 + "|\\\\u05[eE][aA]"
155 + "|\\\\u05[fF]3"
156 + "|\\\\u05[fF]4"
157 + "|\\\\u0600"
158 + "|\\\\u0604"
159 + "|\\\\u061[cC]"
160 + "|\\\\u06[dD]{2}"
161 + "|\\\\u06[fF]{2}"
162 + "|\\\\u070[fF]"
163 + "|\\\\u0750"
164 + "|\\\\u077[fF]"
165 + "|\\\\u0[eE]00"
166 + "|\\\\u0[eE]7[fF]"
167 + "|\\\\u1680"
168 + "|\\\\u180[eE]"
169 + "|\\\\u1[eE]00"
170 + "|\\\\u2000"
171 + "|\\\\u2001"
172 + "|\\\\u2002"
173 + "|\\\\u2003"
174 + "|\\\\u2004"
175 + "|\\\\u2005"
176 + "|\\\\u2006"
177 + "|\\\\u2007"
178 + "|\\\\u2008"
179 + "|\\\\u2009"
180 + "|\\\\u200[aA]"
181 + "|\\\\u200[fF]"
182 + "|\\\\u2025"
183 + "|\\\\u2028"
184 + "|\\\\u2029"
185 + "|\\\\u202[fF]"
186 + "|\\\\u205[fF]"
187 + "|\\\\u2064"
188 + "|\\\\u2066"
189 + "|\\\\u2067"
190 + "|\\\\u2068"
191 + "|\\\\u2069"
192 + "|\\\\u206[aA]"
193 + "|\\\\u206[fF]"
194 + "|\\\\u20[aA][fF]"
195 + "|\\\\u2100"
196 + "|\\\\u213[aA]"
197 + "|\\\\u3000"
198 + "|\\\\u[dD]800"
199 + "|\\\\u[fF]8[fF]{2}"
200 + "|\\\\u[fF][bB]50"
201 + "|\\\\u[fF][dD][fF]{2}"
202 + "|\\\\u[fF][eE]70"
203 + "|\\\\u[fF][eE][fF]{2}"
204 + "|\\\\u[fF]{2}0[eE]"
205 + "|\\\\u[fF]{2}61"
206 + "|\\\\u[fF]{2}[dD][cC]"
207 + "|\\\\u[fF]{3}9"
208 + "|\\\\u[fF]{3}[aA]"
209 + "|\\\\u[fF]{3}[bB]"
210 + "|\\\\u[fF]{4}");
211
212
213 private Map<Integer, TextBlock> singlelineComments;
214
215 private Map<Integer, List<TextBlock>> blockComments;
216
217
218 private boolean allowEscapesForControlCharacters;
219
220
221 private boolean allowByTailComment;
222
223
224 private boolean allowIfAllCharactersEscaped;
225
226
227 private boolean allowNonPrintableEscapes;
228
229
230
231
232
233
234
235 public final void setAllowEscapesForControlCharacters(boolean allow) {
236 allowEscapesForControlCharacters = allow;
237 }
238
239
240
241
242
243
244
245 public final void setAllowByTailComment(boolean allow) {
246 allowByTailComment = allow;
247 }
248
249
250
251
252
253
254
255 public final void setAllowIfAllCharactersEscaped(boolean allow) {
256 allowIfAllCharactersEscaped = allow;
257 }
258
259
260
261
262
263
264
265 public final void setAllowNonPrintableEscapes(boolean allow) {
266 allowNonPrintableEscapes = allow;
267 }
268
269 @Override
270 public int[] getDefaultTokens() {
271 return getRequiredTokens();
272 }
273
274 @Override
275 public int[] getAcceptableTokens() {
276 return getRequiredTokens();
277 }
278
279 @Override
280 public int[] getRequiredTokens() {
281 return new int[] {
282 TokenTypes.STRING_LITERAL,
283 TokenTypes.CHAR_LITERAL,
284 TokenTypes.TEXT_BLOCK_CONTENT,
285 };
286 }
287
288
289 @SuppressWarnings("deprecation")
290 @Override
291 public void beginTree(DetailAST rootAST) {
292 singlelineComments = getFileContents().getSingleLineComments();
293 blockComments = getFileContents().getBlockComments();
294 }
295
296 @Override
297 public void visitToken(DetailAST ast) {
298 final String literal =
299 CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText());
300
301 if (hasUnicodeChar(literal) && !(allowByTailComment && hasTrailComment(ast)
302 || isAllCharactersEscaped(literal)
303 || allowEscapesForControlCharacters
304 && isOnlyUnicodeValidChars(literal, UNICODE_CONTROL)
305 || allowNonPrintableEscapes
306 && isOnlyUnicodeValidChars(literal, NON_PRINTABLE_CHARS))) {
307 log(ast, MSG_KEY);
308 }
309 }
310
311
312
313
314
315
316
317 private static boolean hasUnicodeChar(String literal) {
318 final String literalWithoutEscapedBackslashes =
319 ESCAPED_BACKSLASH.matcher(literal).replaceAll("");
320 return UNICODE_REGEXP.matcher(literalWithoutEscapedBackslashes).find();
321 }
322
323
324
325
326
327
328
329
330 private static boolean isOnlyUnicodeValidChars(String literal, Pattern pattern) {
331 final int unicodeMatchesCounter =
332 countMatches(UNICODE_REGEXP, literal);
333 final int unicodeValidMatchesCounter =
334 countMatches(pattern, literal);
335 return unicodeMatchesCounter - unicodeValidMatchesCounter == 0;
336 }
337
338
339
340
341
342
343
344 private boolean hasTrailComment(DetailAST ast) {
345 int lineNo = ast.getLineNo();
346
347
348
349 if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) {
350 lineNo = ast.getNextSibling().getLineNo();
351 }
352 boolean result = false;
353 if (singlelineComments.containsKey(lineNo)) {
354 result = true;
355 }
356 else {
357 final List<TextBlock> commentList = blockComments.get(lineNo);
358 if (commentList != null) {
359 final TextBlock comment = commentList.get(commentList.size() - 1);
360 final int[] codePoints = getLineCodePoints(lineNo - 1);
361 result = isTrailingBlockComment(comment, codePoints);
362 }
363 }
364 return result;
365 }
366
367
368
369
370
371
372
373
374 private static boolean isTrailingBlockComment(TextBlock comment, int... codePoints) {
375 return comment.getText().length != 1
376 || CodePointUtil.isBlank(Arrays.copyOfRange(codePoints,
377 comment.getEndColNo() + 1, codePoints.length));
378 }
379
380
381
382
383
384
385
386
387 private static int countMatches(Pattern pattern, String target) {
388 int matcherCounter = 0;
389 final Matcher matcher = pattern.matcher(target);
390 while (matcher.find()) {
391 matcherCounter++;
392 }
393 return matcherCounter;
394 }
395
396
397
398
399
400
401
402 private boolean isAllCharactersEscaped(String literal) {
403 return allowIfAllCharactersEscaped
404 && ALL_ESCAPED_CHARS.matcher(literal).find();
405 }
406
407 }