1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package com.puppycrawl.tools.checkstyle.checks;
21
22 import static com.google.common.truth.Truth.assertWithMessage;
23 import static com.puppycrawl.tools.checkstyle.checks.AvoidEscapedUnicodeCharactersCheck.MSG_KEY;
24
25 import java.util.Arrays;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
28 import java.util.stream.IntStream;
29
30 import org.junit.jupiter.api.Test;
31
32 import com.puppycrawl.tools.checkstyle.AbstractModuleTestSupport;
33 import com.puppycrawl.tools.checkstyle.api.TokenTypes;
34 import com.puppycrawl.tools.checkstyle.internal.utils.TestUtil;
35
36 public class AvoidEscapedUnicodeCharactersCheckTest extends AbstractModuleTestSupport {
37
38
39
40 private static final int[] C0_CONTROL_CHARACTER_INDICES = {
41 0x0000,
42 0x0001,
43 0x0002,
44 0x0003,
45 0x0004,
46 0x0005,
47 0x0006,
48 0x0007,
49 0x0008,
50 0x0009,
51 0x000a,
52 0x000b,
53 0x000c,
54 0x000d,
55 0x000e,
56 0x000f,
57 0x0010,
58 0x0011,
59 0x0012,
60 0x0013,
61 0x0014,
62 0x0015,
63 0x0016,
64 0x0017,
65 0x0018,
66 0x0019,
67 0x001a,
68 0x001b,
69 0x001c,
70 0x001d,
71 0x001e,
72 0x001f,
73 };
74
75
76
77 private static final int[] C1_CONTROL_CHARACTER_INDICES = {
78 0x0080,
79 0x0081,
80 0x0082,
81 0x0083,
82 0x0084,
83 0x0085,
84 0x0086,
85 0x0087,
86 0x0088,
87 0x0089,
88 0x008a,
89 0x008b,
90 0x008c,
91 0x008d,
92 0x008e,
93 0x008f,
94 0x0090,
95 0x0091,
96 0x0092,
97 0x0093,
98 0x0094,
99 0x0095,
100 0x0096,
101 0x0097,
102 0x0098,
103 0x0099,
104 0x009a,
105 0x009b,
106 0x009c,
107 0x009d,
108 0x009e,
109 0x009f,
110 };
111
112
113
114 private static final int[] OTHER_CONTROL_CHARACTER_INDICES = {
115 0x00ad,
116 0x034f,
117 0x070f,
118 0x180e,
119 0x200b,
120 0x200c,
121 0x200d,
122 0x200e,
123 0x200f,
124 0x202a,
125 0x202b,
126 0x202c,
127 0x202d,
128 0x202e,
129 0x2060,
130 0x2061,
131 0x2062,
132 0x2063,
133 0x2064,
134 0x206a,
135 0x206b,
136 0x206c,
137 0x206d,
138 0x206e,
139 0x206f,
140 0xfeff,
141 0xfff9,
142 0xfffa,
143 0xfffb,
144 };
145
146 @Override
147 protected String getPackageLocation() {
148 return "com/puppycrawl/tools/checkstyle/checks/avoidescapedunicodecharacters";
149 }
150
151 @Test
152 public void testGetRequiredTokens() {
153 final AvoidEscapedUnicodeCharactersCheck checkObj =
154 new AvoidEscapedUnicodeCharactersCheck();
155 final int[] expected = {
156 TokenTypes.STRING_LITERAL,
157 TokenTypes.CHAR_LITERAL,
158 TokenTypes.TEXT_BLOCK_CONTENT,
159 };
160 assertWithMessage("Required tokens differ from expected")
161 .that(checkObj.getRequiredTokens())
162 .isEqualTo(expected);
163 }
164
165 @Test
166 public void testDefault() throws Exception {
167 final String[] expected = {
168 "17:38: " + getCheckMessage(MSG_KEY),
169 "19:38: " + getCheckMessage(MSG_KEY),
170 "21:38: " + getCheckMessage(MSG_KEY),
171 "25:38: " + getCheckMessage(MSG_KEY),
172 "26:38: " + getCheckMessage(MSG_KEY),
173 "30:24: " + getCheckMessage(MSG_KEY),
174 "34:36: " + getCheckMessage(MSG_KEY),
175 "35:36: " + getCheckMessage(MSG_KEY),
176 "37:24: " + getCheckMessage(MSG_KEY),
177 "41:38: " + getCheckMessage(MSG_KEY),
178 "42:38: " + getCheckMessage(MSG_KEY),
179 "43:38: " + getCheckMessage(MSG_KEY),
180 "44:47: " + getCheckMessage(MSG_KEY),
181 "52:32: " + getCheckMessage(MSG_KEY),
182 "69:35: " + getCheckMessage(MSG_KEY),
183 "70:35: " + getCheckMessage(MSG_KEY),
184 "71:35: " + getCheckMessage(MSG_KEY),
185 "72:35: " + getCheckMessage(MSG_KEY),
186 "82:24: " + getCheckMessage(MSG_KEY),
187 "83:24: " + getCheckMessage(MSG_KEY),
188 "84:24: " + getCheckMessage(MSG_KEY),
189 "85:24: " + getCheckMessage(MSG_KEY),
190 "86:24: " + getCheckMessage(MSG_KEY),
191 "87:24: " + getCheckMessage(MSG_KEY),
192 "89:24: " + getCheckMessage(MSG_KEY),
193 "92:31: " + getCheckMessage(MSG_KEY),
194 "92:48: " + getCheckMessage(MSG_KEY),
195 "96:38: " + getCheckMessage(MSG_KEY),
196 "97:38: " + getCheckMessage(MSG_KEY),
197 "98:38: " + getCheckMessage(MSG_KEY),
198 "99:38: " + getCheckMessage(MSG_KEY),
199 "102:31: " + getCheckMessage(MSG_KEY),
200 "102:45: " + getCheckMessage(MSG_KEY),
201 "103:34: " + getCheckMessage(MSG_KEY),
202 "104:46: " + getCheckMessage(MSG_KEY),
203 "108:38: " + getCheckMessage(MSG_KEY),
204 "114:38: " + getCheckMessage(MSG_KEY),
205 "116:46: " + getCheckMessage(MSG_KEY),
206 "117:55: " + getCheckMessage(MSG_KEY),
207 "118:46: " + getCheckMessage(MSG_KEY),
208 "119:55: " + getCheckMessage(MSG_KEY),
209 "120:46: " + getCheckMessage(MSG_KEY),
210 "121:55: " + getCheckMessage(MSG_KEY),
211 "122:46: " + getCheckMessage(MSG_KEY),
212 "123:55: " + getCheckMessage(MSG_KEY),
213 "124:46: " + getCheckMessage(MSG_KEY),
214 "125:55: " + getCheckMessage(MSG_KEY),
215 "126:48: " + getCheckMessage(MSG_KEY),
216 "127:57: " + getCheckMessage(MSG_KEY),
217 };
218 verifyWithInlineConfigParser(
219 getPath("InputAvoidEscapedUnicodeCharacters.java"), expected);
220 }
221
222 @Test
223 public void testAllowEscapesForControlCharacterSet() throws Exception {
224 final String[] expected = {
225 "17:38: " + getCheckMessage(MSG_KEY),
226 "19:38: " + getCheckMessage(MSG_KEY),
227 "21:38: " + getCheckMessage(MSG_KEY),
228 "25:38: " + getCheckMessage(MSG_KEY),
229 "26:38: " + getCheckMessage(MSG_KEY),
230 "34:36: " + getCheckMessage(MSG_KEY),
231 "35:36: " + getCheckMessage(MSG_KEY),
232 "41:38: " + getCheckMessage(MSG_KEY),
233 "42:38: " + getCheckMessage(MSG_KEY),
234 "43:38: " + getCheckMessage(MSG_KEY),
235 "44:47: " + getCheckMessage(MSG_KEY),
236 "52:32: " + getCheckMessage(MSG_KEY),
237 "69:35: " + getCheckMessage(MSG_KEY),
238 "70:35: " + getCheckMessage(MSG_KEY),
239 "71:35: " + getCheckMessage(MSG_KEY),
240 "72:35: " + getCheckMessage(MSG_KEY),
241 "83:24: " + getCheckMessage(MSG_KEY),
242 "84:24: " + getCheckMessage(MSG_KEY),
243 "85:24: " + getCheckMessage(MSG_KEY),
244 "86:24: " + getCheckMessage(MSG_KEY),
245 "87:24: " + getCheckMessage(MSG_KEY),
246 "89:24: " + getCheckMessage(MSG_KEY),
247 "92:31: " + getCheckMessage(MSG_KEY),
248 "92:48: " + getCheckMessage(MSG_KEY),
249 "96:38: " + getCheckMessage(MSG_KEY),
250 "97:38: " + getCheckMessage(MSG_KEY),
251 "98:38: " + getCheckMessage(MSG_KEY),
252 "99:38: " + getCheckMessage(MSG_KEY),
253 "102:45: " + getCheckMessage(MSG_KEY),
254 "104:46: " + getCheckMessage(MSG_KEY),
255 "108:38: " + getCheckMessage(MSG_KEY),
256 "114:38: " + getCheckMessage(MSG_KEY),
257 "116:46: " + getCheckMessage(MSG_KEY),
258 "117:55: " + getCheckMessage(MSG_KEY),
259 "118:46: " + getCheckMessage(MSG_KEY),
260 "119:55: " + getCheckMessage(MSG_KEY),
261 "120:46: " + getCheckMessage(MSG_KEY),
262 "121:55: " + getCheckMessage(MSG_KEY),
263 "122:46: " + getCheckMessage(MSG_KEY),
264 "123:55: " + getCheckMessage(MSG_KEY),
265 "124:46: " + getCheckMessage(MSG_KEY),
266 "125:55: " + getCheckMessage(MSG_KEY),
267 "126:48: " + getCheckMessage(MSG_KEY),
268 "127:57: " + getCheckMessage(MSG_KEY),
269 };
270 verifyWithInlineConfigParser(
271 getPath("InputAvoidEscapedUnicodeCharacters1.java"), expected);
272 }
273
274 @Test
275 public void testAllowByTailComment() throws Exception {
276 final String[] expected = {
277 "17:38: " + getCheckMessage(MSG_KEY),
278 "25:38: " + getCheckMessage(MSG_KEY),
279 "34:36: " + getCheckMessage(MSG_KEY),
280 "41:38: " + getCheckMessage(MSG_KEY),
281 "43:38: " + getCheckMessage(MSG_KEY),
282 "44:47: " + getCheckMessage(MSG_KEY),
283 "69:35: " + getCheckMessage(MSG_KEY),
284 "71:35: " + getCheckMessage(MSG_KEY),
285 "73:35: " + getCheckMessage(MSG_KEY),
286 "75:35: " + getCheckMessage(MSG_KEY),
287 "86:24: " + getCheckMessage(MSG_KEY),
288 "88:24: " + getCheckMessage(MSG_KEY),
289 "90:24: " + getCheckMessage(MSG_KEY),
290 "92:24: " + getCheckMessage(MSG_KEY),
291 "94:24: " + getCheckMessage(MSG_KEY),
292 "97:24: " + getCheckMessage(MSG_KEY),
293 "101:31: " + getCheckMessage(MSG_KEY),
294 "101:48: " + getCheckMessage(MSG_KEY),
295 "111:31: " + getCheckMessage(MSG_KEY),
296 "111:45: " + getCheckMessage(MSG_KEY),
297 "117:38: " + getCheckMessage(MSG_KEY),
298 "123:38: " + getCheckMessage(MSG_KEY),
299 "125:46: " + getCheckMessage(MSG_KEY),
300 "128:46: " + getCheckMessage(MSG_KEY),
301 "131:46: " + getCheckMessage(MSG_KEY),
302 "134:46: " + getCheckMessage(MSG_KEY),
303 "137:46: " + getCheckMessage(MSG_KEY),
304 "140:48: " + getCheckMessage(MSG_KEY),
305 };
306 verifyWithInlineConfigParser(
307 getPath("InputAvoidEscapedUnicodeCharacters2.java"), expected);
308 }
309
310 @Test
311 public void testAllowAllCharactersEscaped() throws Exception {
312 final String[] expected = {
313 "17:38: " + getCheckMessage(MSG_KEY),
314 "19:38: " + getCheckMessage(MSG_KEY),
315 "21:38: " + getCheckMessage(MSG_KEY),
316 "25:38: " + getCheckMessage(MSG_KEY),
317 "26:38: " + getCheckMessage(MSG_KEY),
318 "41:38: " + getCheckMessage(MSG_KEY),
319 "42:38: " + getCheckMessage(MSG_KEY),
320 "43:38: " + getCheckMessage(MSG_KEY),
321 "52:32: " + getCheckMessage(MSG_KEY),
322 "96:38: " + getCheckMessage(MSG_KEY),
323 "97:38: " + getCheckMessage(MSG_KEY),
324 "98:38: " + getCheckMessage(MSG_KEY),
325 "99:38: " + getCheckMessage(MSG_KEY),
326 "108:38: " + getCheckMessage(MSG_KEY),
327 "116:46: " + getCheckMessage(MSG_KEY),
328 "117:55: " + getCheckMessage(MSG_KEY),
329 "118:46: " + getCheckMessage(MSG_KEY),
330 "119:55: " + getCheckMessage(MSG_KEY),
331 "120:46: " + getCheckMessage(MSG_KEY),
332 "121:55: " + getCheckMessage(MSG_KEY),
333 "122:46: " + getCheckMessage(MSG_KEY),
334 "123:55: " + getCheckMessage(MSG_KEY),
335 "124:46: " + getCheckMessage(MSG_KEY),
336 "125:55: " + getCheckMessage(MSG_KEY),
337 "126:48: " + getCheckMessage(MSG_KEY),
338 "127:57: " + getCheckMessage(MSG_KEY),
339 };
340 verifyWithInlineConfigParser(
341 getPath("InputAvoidEscapedUnicodeCharacters3.java"), expected);
342 }
343
344 @Test
345 public void allowNonPrintableEscapes() throws Exception {
346 final String[] expected = {
347 "17:38: " + getCheckMessage(MSG_KEY),
348 "19:38: " + getCheckMessage(MSG_KEY),
349 "21:38: " + getCheckMessage(MSG_KEY),
350 "25:38: " + getCheckMessage(MSG_KEY),
351 "26:38: " + getCheckMessage(MSG_KEY),
352 "34:36: " + getCheckMessage(MSG_KEY),
353 "35:36: " + getCheckMessage(MSG_KEY),
354 "41:38: " + getCheckMessage(MSG_KEY),
355 "42:38: " + getCheckMessage(MSG_KEY),
356 "43:38: " + getCheckMessage(MSG_KEY),
357 "44:47: " + getCheckMessage(MSG_KEY),
358 "52:32: " + getCheckMessage(MSG_KEY),
359 "96:38: " + getCheckMessage(MSG_KEY),
360 "97:38: " + getCheckMessage(MSG_KEY),
361 "98:38: " + getCheckMessage(MSG_KEY),
362 "99:38: " + getCheckMessage(MSG_KEY),
363 "103:34: " + getCheckMessage(MSG_KEY),
364 "104:46: " + getCheckMessage(MSG_KEY),
365 "108:38: " + getCheckMessage(MSG_KEY),
366 "114:38: " + getCheckMessage(MSG_KEY),
367 "116:46: " + getCheckMessage(MSG_KEY),
368 "117:55: " + getCheckMessage(MSG_KEY),
369 "118:46: " + getCheckMessage(MSG_KEY),
370 "119:55: " + getCheckMessage(MSG_KEY),
371 "120:46: " + getCheckMessage(MSG_KEY),
372 "121:55: " + getCheckMessage(MSG_KEY),
373 "122:46: " + getCheckMessage(MSG_KEY),
374 "123:55: " + getCheckMessage(MSG_KEY),
375 "124:46: " + getCheckMessage(MSG_KEY),
376 "125:55: " + getCheckMessage(MSG_KEY),
377 "126:48: " + getCheckMessage(MSG_KEY),
378 "127:57: " + getCheckMessage(MSG_KEY),
379 };
380 verifyWithInlineConfigParser(
381 getPath("InputAvoidEscapedUnicodeCharacters4.java"), expected);
382 }
383
384 @Test
385 public void testAllowByTailCommentWithEmoji() throws Exception {
386 final String[] expected = {
387 "15:24: " + getCheckMessage(MSG_KEY),
388 "18:24: " + getCheckMessage(MSG_KEY),
389 "22:30: " + getCheckMessage(MSG_KEY),
390 "32:18: " + getCheckMessage(MSG_KEY),
391 "35:18: " + getCheckMessage(MSG_KEY),
392 "37:18: " + getCheckMessage(MSG_KEY),
393 "40:18: " + getCheckMessage(MSG_KEY),
394 };
395 verifyWithInlineConfigParser(
396 getPath("InputAvoidEscapedUnicodeCharacters5.java"), expected);
397 }
398
399 @Test
400 public void testAvoidEscapedUnicodeCharactersTextBlocksAllowByComment() throws Exception {
401 final String[] expected = {
402 "18:30: " + getCheckMessage(MSG_KEY),
403 "20:30: " + getCheckMessage(MSG_KEY),
404 "22:30: " + getCheckMessage(MSG_KEY),
405 "25:39: " + getCheckMessage(MSG_KEY),
406 "30:33: " + getCheckMessage(MSG_KEY),
407 "33:33: " + getCheckMessage(MSG_KEY),
408 "36:33: " + getCheckMessage(MSG_KEY),
409 "41:42: " + getCheckMessage(MSG_KEY),
410 };
411 verifyWithInlineConfigParser(
412 getNonCompilablePath(
413 "InputAvoidEscapedUnicodeCharactersTextBlocksAllowByComment.java"),
414 expected);
415 }
416
417 @Test
418 public void testAvoidEscapedUnicodeCharactersTextBlocks() throws Exception {
419 final String[] expected = {
420 "17:30: " + getCheckMessage(MSG_KEY),
421 "18:30: " + getCheckMessage(MSG_KEY),
422 "19:30: " + getCheckMessage(MSG_KEY),
423 "20:39: " + getCheckMessage(MSG_KEY),
424 "24:33: " + getCheckMessage(MSG_KEY),
425 "26:33: " + getCheckMessage(MSG_KEY),
426 "28:33: " + getCheckMessage(MSG_KEY),
427 "30:42: " + getCheckMessage(MSG_KEY),
428 };
429 verifyWithInlineConfigParser(
430 getNonCompilablePath("InputAvoidEscapedUnicodeCharactersTextBlocks.java"),
431 expected);
432 }
433
434 @Test
435 public void testAvoidEscapedUnicodeCharactersEscapedS() throws Exception {
436 final String[] expected = {
437 "17:21: " + getCheckMessage(MSG_KEY),
438 "18:22: " + getCheckMessage(MSG_KEY),
439 "27:39: " + getCheckMessage(MSG_KEY),
440 "30:39: " + getCheckMessage(MSG_KEY),
441 "33:39: " + getCheckMessage(MSG_KEY),
442 "36:22: " + getCheckMessage(MSG_KEY),
443 };
444 verifyWithInlineConfigParser(
445 getNonCompilablePath("InputAvoidEscapedUnicodeCharactersEscapedS.java"),
446 expected);
447 }
448
449 @Test
450 public void testGetAcceptableTokens() {
451 final AvoidEscapedUnicodeCharactersCheck check = new AvoidEscapedUnicodeCharactersCheck();
452 final int[] actual = check.getAcceptableTokens();
453 final int[] expected = {
454 TokenTypes.STRING_LITERAL,
455 TokenTypes.CHAR_LITERAL,
456 TokenTypes.TEXT_BLOCK_CONTENT,
457 };
458 assertWithMessage("Acceptable tokens differ from expected")
459 .that(actual)
460 .isEqualTo(expected);
461 }
462
463 @Test
464 public void testAllowEscapesForControlCharacterSetForAllCharacters() throws Exception {
465
466 final int indexOfStartLineInInputFile = 16;
467 final String message = getCheckMessage(MSG_KEY);
468 final String[] expected = IntStream.rangeClosed(0, 0xffff)
469 .parallel()
470 .filter(val -> !isControlCharacter(val))
471 .mapToObj(msg -> indexOfStartLineInInputFile + msg + ":54: " + message)
472 .toArray(String[]::new);
473 verifyWithInlineConfigParser(
474 getPath("InputAvoidEscapedUnicodeCharactersAllEscapedUnicodeCharacters.java"),
475 expected);
476 }
477
478
479
480
481
482
483
484
485
486
487
488 @Test
489 public void testCountMatches() throws Exception {
490 final AvoidEscapedUnicodeCharactersCheck check = new AvoidEscapedUnicodeCharactersCheck();
491 final int actual = TestUtil.invokeMethod(check, "countMatches",
492 Pattern.compile("\\\\u[a-fA-F\\d]{4}"), "\\u1234");
493 assertWithMessage("Unexpected matches count")
494 .that(actual)
495 .isEqualTo(1);
496 }
497
498
499
500
501
502 @Test
503 public void testNonPrintableCharsAreSorted() {
504 String expression = TestUtil.<Pattern>getInternalStaticState(
505 AvoidEscapedUnicodeCharactersCheck.class, "NON_PRINTABLE_CHARS").pattern();
506
507
508 final String[] charExpressions = {"Aa", "Bb", "Cc", "Dd", "Ee", "Ff"};
509 for (String charExpression : charExpressions) {
510 final String regex = "\\[[" + charExpression + "]{2}]";
511 expression = expression.replaceAll(regex, charExpression.substring(0, 1));
512 }
513
514
515 for (int i = 4; i > 1; i--) {
516 final String regex = "([A-F])\\{" + i + "}";
517 String replacement = "$1$1{" + (i - 1) + "}";
518 if (i == 2) {
519 replacement = "$1$1";
520 }
521 expression = expression.replaceAll(regex, replacement);
522 }
523
524
525 final String[] expressionParts = expression.split("\\|");
526 final Pattern unicodeCharPattern = Pattern.compile("^\\\\\\\\u[\\dA-F]{4}$");
527 String lastChar = null;
528 for (int i = 0; i < expressionParts.length; i++) {
529 final String currentChar = expressionParts[i];
530 final Matcher matcher = unicodeCharPattern.matcher(currentChar);
531 if (!matcher.matches()) {
532 final String message = "Character '" + currentChar + "' (at position " + i
533 + ") doesn't match the pattern";
534 assertWithMessage(message)
535 .that(matcher.matches())
536 .isTrue();
537 }
538 if (lastChar != null) {
539 final String message = "Character '" + lastChar + "' should be after '"
540 + currentChar + "', position: " + i;
541 assertWithMessage(message)
542 .that(lastChar.compareTo(currentChar) < 0)
543 .isTrue();
544 }
545 lastChar = currentChar;
546 }
547 }
548
549 private static boolean isControlCharacter(final int character) {
550 return Arrays.binarySearch(C0_CONTROL_CHARACTER_INDICES, character) >= 0
551 || Arrays.binarySearch(C1_CONTROL_CHARACTER_INDICES, character) >= 0
552 || Arrays.binarySearch(OTHER_CONTROL_CHARACTER_INDICES, character) >= 0;
553 }
554
555 }