1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package com.puppycrawl.tools.checkstyle.checks;
21
22 import java.util.Arrays;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.regex.Matcher;
26 import java.util.regex.Pattern;
27
28 import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
29 import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
30 import com.puppycrawl.tools.checkstyle.api.DetailAST;
31 import com.puppycrawl.tools.checkstyle.api.TextBlock;
32 import com.puppycrawl.tools.checkstyle.api.TokenTypes;
33 import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
34 import com.puppycrawl.tools.checkstyle.utils.CodePointUtil;
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89 @FileStatefulCheck
90 public class AvoidEscapedUnicodeCharactersCheck
91 extends AbstractCheck {
92
93
94
95
96
97 public static final String MSG_KEY = "forbid.escaped.unicode.char";
98
99
100 private static final Pattern UNICODE_REGEXP = Pattern.compile("\\\\u+[a-fA-F\\d]{4}");
101
102
103
104
105
106
107
108 private static final Pattern UNICODE_CONTROL = Pattern.compile("\\\\u+"
109 + "(00[0-1][\\dA-Fa-f]"
110 + "|00[8-9][\\dA-Fa-f]"
111 + "|00[aA][dD]"
112 + "|034[fF]"
113 + "|070[fF]"
114 + "|180[eE]"
115 + "|200[b-fB-F]"
116 + "|202[a-eA-E]"
117 + "|206[0-4a-fA-F]"
118 + "|[fF]{3}[9a-bA-B]"
119 + "|[fF][eE][fF]{2})");
120
121
122
123
124
125
126 private static final Pattern ALL_ESCAPED_CHARS = Pattern.compile("^("
127 + UNICODE_REGEXP.pattern()
128 + "|\""
129 + "|'"
130 + "|\\\\"
131 + "|\\\\b"
132 + "|\\\\f"
133 + "|\\\\n"
134 + "|\\R"
135 + "|\\\\r"
136 + "|\\\\s"
137 + "|\\\\t"
138 + ")+$");
139
140
141 private static final Pattern ESCAPED_BACKSLASH = Pattern.compile("\\\\\\\\");
142
143
144 private static final Pattern NON_PRINTABLE_CHARS = Pattern.compile("\\\\u0000"
145 + "|\\\\u0009"
146 + "|\\\\u000[bB]"
147 + "|\\\\u000[cC]"
148 + "|\\\\u0020"
149 + "|\\\\u007[fF]"
150 + "|\\\\u0085"
151 + "|\\\\u009[fF]"
152 + "|\\\\u00[aA]0"
153 + "|\\\\u00[aA][dD]"
154 + "|\\\\u04[fF]9"
155 + "|\\\\u05[bB][eE]"
156 + "|\\\\u05[dD]0"
157 + "|\\\\u05[eE][aA]"
158 + "|\\\\u05[fF]3"
159 + "|\\\\u05[fF]4"
160 + "|\\\\u0600"
161 + "|\\\\u0604"
162 + "|\\\\u061[cC]"
163 + "|\\\\u06[dD]{2}"
164 + "|\\\\u06[fF]{2}"
165 + "|\\\\u070[fF]"
166 + "|\\\\u0750"
167 + "|\\\\u077[fF]"
168 + "|\\\\u0[eE]00"
169 + "|\\\\u0[eE]7[fF]"
170 + "|\\\\u1680"
171 + "|\\\\u180[eE]"
172 + "|\\\\u1[eE]00"
173 + "|\\\\u2000"
174 + "|\\\\u2001"
175 + "|\\\\u2002"
176 + "|\\\\u2003"
177 + "|\\\\u2004"
178 + "|\\\\u2005"
179 + "|\\\\u2006"
180 + "|\\\\u2007"
181 + "|\\\\u2008"
182 + "|\\\\u2009"
183 + "|\\\\u200[aA]"
184 + "|\\\\u200[fF]"
185 + "|\\\\u2025"
186 + "|\\\\u2028"
187 + "|\\\\u2029"
188 + "|\\\\u202[fF]"
189 + "|\\\\u205[fF]"
190 + "|\\\\u2064"
191 + "|\\\\u2066"
192 + "|\\\\u2067"
193 + "|\\\\u2068"
194 + "|\\\\u2069"
195 + "|\\\\u206[aA]"
196 + "|\\\\u206[fF]"
197 + "|\\\\u20[aA][fF]"
198 + "|\\\\u2100"
199 + "|\\\\u213[aA]"
200 + "|\\\\u3000"
201 + "|\\\\u[dD]800"
202 + "|\\\\u[fF]8[fF]{2}"
203 + "|\\\\u[fF][bB]50"
204 + "|\\\\u[fF][dD][fF]{2}"
205 + "|\\\\u[fF][eE]70"
206 + "|\\\\u[fF][eE][fF]{2}"
207 + "|\\\\u[fF]{2}0[eE]"
208 + "|\\\\u[fF]{2}61"
209 + "|\\\\u[fF]{2}[dD][cC]"
210 + "|\\\\u[fF]{3}9"
211 + "|\\\\u[fF]{3}[aA]"
212 + "|\\\\u[fF]{3}[bB]"
213 + "|\\\\u[fF]{4}");
214
215
216 private Map<Integer, TextBlock> singlelineComments;
217
218 private Map<Integer, List<TextBlock>> blockComments;
219
220
221 private boolean allowEscapesForControlCharacters;
222
223
224 private boolean allowByTailComment;
225
226
227 private boolean allowIfAllCharactersEscaped;
228
229
230 private boolean allowNonPrintableEscapes;
231
232
233
234
235
236
237
238 public final void setAllowEscapesForControlCharacters(boolean allow) {
239 allowEscapesForControlCharacters = allow;
240 }
241
242
243
244
245
246
247
248 public final void setAllowByTailComment(boolean allow) {
249 allowByTailComment = allow;
250 }
251
252
253
254
255
256
257
258 public final void setAllowIfAllCharactersEscaped(boolean allow) {
259 allowIfAllCharactersEscaped = allow;
260 }
261
262
263
264
265
266
267
268 public final void setAllowNonPrintableEscapes(boolean allow) {
269 allowNonPrintableEscapes = allow;
270 }
271
272 @Override
273 public int[] getDefaultTokens() {
274 return getRequiredTokens();
275 }
276
277 @Override
278 public int[] getAcceptableTokens() {
279 return getRequiredTokens();
280 }
281
282 @Override
283 public int[] getRequiredTokens() {
284 return new int[] {
285 TokenTypes.STRING_LITERAL,
286 TokenTypes.CHAR_LITERAL,
287 TokenTypes.TEXT_BLOCK_CONTENT,
288 };
289 }
290
291
292 @SuppressWarnings("deprecation")
293 @Override
294 public void beginTree(DetailAST rootAST) {
295 singlelineComments = getFileContents().getSingleLineComments();
296 blockComments = getFileContents().getBlockComments();
297 }
298
299 @Override
300 public void visitToken(DetailAST ast) {
301 final String literal =
302 CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText());
303
304 if (hasUnicodeChar(literal) && !(allowByTailComment && hasTrailComment(ast)
305 || isAllCharactersEscaped(literal)
306 || allowEscapesForControlCharacters
307 && isOnlyUnicodeValidChars(literal, UNICODE_CONTROL)
308 || allowNonPrintableEscapes
309 && isOnlyUnicodeValidChars(literal, NON_PRINTABLE_CHARS))) {
310 log(ast, MSG_KEY);
311 }
312 }
313
314
315
316
317
318
319
320 private static boolean hasUnicodeChar(String literal) {
321 final String literalWithoutEscapedBackslashes =
322 ESCAPED_BACKSLASH.matcher(literal).replaceAll("");
323 return UNICODE_REGEXP.matcher(literalWithoutEscapedBackslashes).find();
324 }
325
326
327
328
329
330
331
332
333 private static boolean isOnlyUnicodeValidChars(String literal, Pattern pattern) {
334 final int unicodeMatchesCounter =
335 countMatches(UNICODE_REGEXP, literal);
336 final int unicodeValidMatchesCounter =
337 countMatches(pattern, literal);
338 return unicodeMatchesCounter - unicodeValidMatchesCounter == 0;
339 }
340
341
342
343
344
345
346
347 private boolean hasTrailComment(DetailAST ast) {
348 int lineNo = ast.getLineNo();
349
350
351
352 if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) {
353 lineNo = ast.getNextSibling().getLineNo();
354 }
355 boolean result = false;
356 if (singlelineComments.containsKey(lineNo)) {
357 result = true;
358 }
359 else {
360 final List<TextBlock> commentList = blockComments.get(lineNo);
361 if (commentList != null) {
362 final TextBlock comment = commentList.get(commentList.size() - 1);
363 final int[] codePoints = getLineCodePoints(lineNo - 1);
364 result = isTrailingBlockComment(comment, codePoints);
365 }
366 }
367 return result;
368 }
369
370
371
372
373
374
375
376
377 private static boolean isTrailingBlockComment(TextBlock comment, int... codePoints) {
378 return comment.getText().length != 1
379 || CodePointUtil.isBlank(Arrays.copyOfRange(codePoints,
380 comment.getEndColNo() + 1, codePoints.length));
381 }
382
383
384
385
386
387
388
389
390 private static int countMatches(Pattern pattern, String target) {
391 int matcherCounter = 0;
392 final Matcher matcher = pattern.matcher(target);
393 while (matcher.find()) {
394 matcherCounter++;
395 }
396 return matcherCounter;
397 }
398
399
400
401
402
403
404
405 private boolean isAllCharactersEscaped(String literal) {
406 return allowIfAllCharactersEscaped
407 && ALL_ESCAPED_CHARS.matcher(literal).find();
408 }
409
410 }