1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package com.puppycrawl.tools.checkstyle.checks;
21
22 import java.util.Arrays;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.regex.Matcher;
26 import java.util.regex.Pattern;
27
28 import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
29 import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
30 import com.puppycrawl.tools.checkstyle.api.DetailAST;
31 import com.puppycrawl.tools.checkstyle.api.TextBlock;
32 import com.puppycrawl.tools.checkstyle.api.TokenTypes;
33 import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
34 import com.puppycrawl.tools.checkstyle.utils.CodePointUtil;
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51 @FileStatefulCheck
52 public class AvoidEscapedUnicodeCharactersCheck
53 extends AbstractCheck {
54
55
56
57
58
59 public static final String MSG_KEY = "forbid.escaped.unicode.char";
60
61
62 private static final Pattern UNICODE_REGEXP = Pattern.compile("\\\\u+[a-fA-F\\d]{4}");
63
64
65
66
67
68
69
70 private static final Pattern UNICODE_CONTROL = Pattern.compile("\\\\u+"
71 + "(00[0-1][\\dA-Fa-f]"
72 + "|00[8-9][\\dA-Fa-f]"
73 + "|00[aA][dD]"
74 + "|034[fF]"
75 + "|070[fF]"
76 + "|180[eE]"
77 + "|200[b-fB-F]"
78 + "|202[a-eA-E]"
79 + "|206[0-4a-fA-F]"
80 + "|[fF]{3}[9a-bA-B]"
81 + "|[fF][eE][fF]{2})");
82
83
84
85
86
87
88 private static final Pattern ALL_ESCAPED_CHARS = Pattern.compile("^("
89 + UNICODE_REGEXP.pattern()
90 + "|\""
91 + "|'"
92 + "|\\\\"
93 + "|\\\\b"
94 + "|\\\\f"
95 + "|\\\\n"
96 + "|\\R"
97 + "|\\\\r"
98 + "|\\\\s"
99 + "|\\\\t"
100 + ")+$");
101
102
103 private static final Pattern ESCAPED_BACKSLASH = Pattern.compile("\\\\\\\\");
104
105
106 private static final Pattern NON_PRINTABLE_CHARS = Pattern.compile("\\\\u0000"
107 + "|\\\\u0009"
108 + "|\\\\u000[bB]"
109 + "|\\\\u000[cC]"
110 + "|\\\\u0020"
111 + "|\\\\u007[fF]"
112 + "|\\\\u0085"
113 + "|\\\\u009[fF]"
114 + "|\\\\u00[aA]0"
115 + "|\\\\u00[aA][dD]"
116 + "|\\\\u04[fF]9"
117 + "|\\\\u05[bB][eE]"
118 + "|\\\\u05[dD]0"
119 + "|\\\\u05[eE][aA]"
120 + "|\\\\u05[fF]3"
121 + "|\\\\u05[fF]4"
122 + "|\\\\u0600"
123 + "|\\\\u0604"
124 + "|\\\\u061[cC]"
125 + "|\\\\u06[dD]{2}"
126 + "|\\\\u06[fF]{2}"
127 + "|\\\\u070[fF]"
128 + "|\\\\u0750"
129 + "|\\\\u077[fF]"
130 + "|\\\\u0[eE]00"
131 + "|\\\\u0[eE]7[fF]"
132 + "|\\\\u1680"
133 + "|\\\\u180[eE]"
134 + "|\\\\u1[eE]00"
135 + "|\\\\u2000"
136 + "|\\\\u2001"
137 + "|\\\\u2002"
138 + "|\\\\u2003"
139 + "|\\\\u2004"
140 + "|\\\\u2005"
141 + "|\\\\u2006"
142 + "|\\\\u2007"
143 + "|\\\\u2008"
144 + "|\\\\u2009"
145 + "|\\\\u200[aA]"
146 + "|\\\\u200[fF]"
147 + "|\\\\u2025"
148 + "|\\\\u2028"
149 + "|\\\\u2029"
150 + "|\\\\u202[fF]"
151 + "|\\\\u205[fF]"
152 + "|\\\\u2064"
153 + "|\\\\u2066"
154 + "|\\\\u2067"
155 + "|\\\\u2068"
156 + "|\\\\u2069"
157 + "|\\\\u206[aA]"
158 + "|\\\\u206[fF]"
159 + "|\\\\u20[aA][fF]"
160 + "|\\\\u2100"
161 + "|\\\\u213[aA]"
162 + "|\\\\u3000"
163 + "|\\\\u[dD]800"
164 + "|\\\\u[fF]8[fF]{2}"
165 + "|\\\\u[fF][bB]50"
166 + "|\\\\u[fF][dD][fF]{2}"
167 + "|\\\\u[fF][eE]70"
168 + "|\\\\u[fF][eE][fF]{2}"
169 + "|\\\\u[fF]{2}0[eE]"
170 + "|\\\\u[fF]{2}61"
171 + "|\\\\u[fF]{2}[dD][cC]"
172 + "|\\\\u[fF]{3}9"
173 + "|\\\\u[fF]{3}[aA]"
174 + "|\\\\u[fF]{3}[bB]"
175 + "|\\\\u[fF]{4}");
176
177
178 private Map<Integer, TextBlock> singlelineComments;
179
180 private Map<Integer, List<TextBlock>> blockComments;
181
182
183 private boolean allowEscapesForControlCharacters;
184
185
186 private boolean allowByTailComment;
187
188
189 private boolean allowIfAllCharactersEscaped;
190
191
192 private boolean allowNonPrintableEscapes;
193
194
195
196
197
198
199
200 public final void setAllowEscapesForControlCharacters(boolean allow) {
201 allowEscapesForControlCharacters = allow;
202 }
203
204
205
206
207
208
209
210 public final void setAllowByTailComment(boolean allow) {
211 allowByTailComment = allow;
212 }
213
214
215
216
217
218
219
220 public final void setAllowIfAllCharactersEscaped(boolean allow) {
221 allowIfAllCharactersEscaped = allow;
222 }
223
224
225
226
227
228
229
230 public final void setAllowNonPrintableEscapes(boolean allow) {
231 allowNonPrintableEscapes = allow;
232 }
233
234 @Override
235 public int[] getDefaultTokens() {
236 return getRequiredTokens();
237 }
238
239 @Override
240 public int[] getAcceptableTokens() {
241 return getRequiredTokens();
242 }
243
244 @Override
245 public int[] getRequiredTokens() {
246 return new int[] {
247 TokenTypes.STRING_LITERAL,
248 TokenTypes.CHAR_LITERAL,
249 TokenTypes.TEXT_BLOCK_CONTENT,
250 };
251 }
252
253
254 @Override
255 @SuppressWarnings("deprecation")
256 public void beginTree(DetailAST rootAST) {
257 singlelineComments = getFileContents().getSingleLineComments();
258 blockComments = getFileContents().getBlockComments();
259 }
260
261 @Override
262 public void visitToken(DetailAST ast) {
263 final String literal =
264 CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText());
265
266 if (hasUnicodeChar(literal) && !(allowByTailComment && hasTrailComment(ast)
267 || isAllCharactersEscaped(literal)
268 || allowEscapesForControlCharacters
269 && isOnlyUnicodeValidChars(literal, UNICODE_CONTROL)
270 || allowNonPrintableEscapes
271 && isOnlyUnicodeValidChars(literal, NON_PRINTABLE_CHARS))) {
272 log(ast, MSG_KEY);
273 }
274 }
275
276
277
278
279
280
281
282 private static boolean hasUnicodeChar(String literal) {
283 final String literalWithoutEscapedBackslashes =
284 ESCAPED_BACKSLASH.matcher(literal).replaceAll("");
285 return UNICODE_REGEXP.matcher(literalWithoutEscapedBackslashes).find();
286 }
287
288
289
290
291
292
293
294
295 private static boolean isOnlyUnicodeValidChars(String literal, Pattern pattern) {
296 final int unicodeMatchesCounter =
297 countMatches(UNICODE_REGEXP, literal);
298 final int unicodeValidMatchesCounter =
299 countMatches(pattern, literal);
300 return unicodeMatchesCounter - unicodeValidMatchesCounter == 0;
301 }
302
303
304
305
306
307
308
309 private boolean hasTrailComment(DetailAST ast) {
310 int lineNo = ast.getLineNo();
311
312
313
314 if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) {
315 lineNo = ast.getNextSibling().getLineNo();
316 }
317 boolean result = false;
318 if (singlelineComments.containsKey(lineNo)) {
319 result = true;
320 }
321 else {
322 final List<TextBlock> commentList = blockComments.get(lineNo);
323 if (commentList != null) {
324 final TextBlock comment = commentList.get(commentList.size() - 1);
325 final int[] codePoints = getLineCodePoints(lineNo - 1);
326 result = isTrailingBlockComment(comment, codePoints);
327 }
328 }
329 return result;
330 }
331
332
333
334
335
336
337
338
339 private static boolean isTrailingBlockComment(TextBlock comment, int... codePoints) {
340 return comment.getText().length != 1
341 || CodePointUtil.isBlank(Arrays.copyOfRange(codePoints,
342 comment.getEndColNo() + 1, codePoints.length));
343 }
344
345
346
347
348
349
350
351
352 private static int countMatches(Pattern pattern, String target) {
353 int matcherCounter = 0;
354 final Matcher matcher = pattern.matcher(target);
355 while (matcher.find()) {
356 matcherCounter++;
357 }
358 return matcherCounter;
359 }
360
361
362
363
364
365
366
367 private boolean isAllCharactersEscaped(String literal) {
368 return allowIfAllCharactersEscaped
369 && ALL_ESCAPED_CHARS.matcher(literal).find();
370 }
371
372 }