1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package com.puppycrawl.tools.checkstyle.checks;
21
22 import static com.google.common.truth.Truth.assertWithMessage;
23 import static com.puppycrawl.tools.checkstyle.checks.AvoidEscapedUnicodeCharactersCheck.MSG_KEY;
24
25 import java.util.Arrays;
26 import java.util.List;
27 import java.util.regex.Matcher;
28 import java.util.regex.Pattern;
29 import java.util.stream.IntStream;
30
31 import org.junit.jupiter.api.Test;
32
33 import com.google.common.base.Splitter;
34 import com.puppycrawl.tools.checkstyle.AbstractModuleTestSupport;
35 import com.puppycrawl.tools.checkstyle.api.TokenTypes;
36 import com.puppycrawl.tools.checkstyle.internal.utils.TestUtil;
37
38 public class AvoidEscapedUnicodeCharactersCheckTest extends AbstractModuleTestSupport {
39
40
41
42 private static final int[] C0_CONTROL_CHARACTER_INDICES = {
43 0x0000,
44 0x0001,
45 0x0002,
46 0x0003,
47 0x0004,
48 0x0005,
49 0x0006,
50 0x0007,
51 0x0008,
52 0x0009,
53 0x000A,
54 0x000B,
55 0x000C,
56 0x000D,
57 0x000E,
58 0x000F,
59 0x0010,
60 0x0011,
61 0x0012,
62 0x0013,
63 0x0014,
64 0x0015,
65 0x0016,
66 0x0017,
67 0x0018,
68 0x0019,
69 0x001A,
70 0x001B,
71 0x001C,
72 0x001D,
73 0x001E,
74 0x001F,
75 };
76
77
78
79 private static final int[] C1_CONTROL_CHARACTER_INDICES = {
80 0x0080,
81 0x0081,
82 0x0082,
83 0x0083,
84 0x0084,
85 0x0085,
86 0x0086,
87 0x0087,
88 0x0088,
89 0x0089,
90 0x008A,
91 0x008B,
92 0x008C,
93 0x008D,
94 0x008E,
95 0x008F,
96 0x0090,
97 0x0091,
98 0x0092,
99 0x0093,
100 0x0094,
101 0x0095,
102 0x0096,
103 0x0097,
104 0x0098,
105 0x0099,
106 0x009A,
107 0x009B,
108 0x009C,
109 0x009D,
110 0x009E,
111 0x009F,
112 };
113
114
115
116 private static final int[] OTHER_CONTROL_CHARACTER_INDICES = {
117 0x00AD,
118 0x034F,
119 0x070F,
120 0x180E,
121 0x200B,
122 0x200C,
123 0x200D,
124 0x200E,
125 0x200F,
126 0x202A,
127 0x202B,
128 0x202C,
129 0x202D,
130 0x202E,
131 0x2060,
132 0x2061,
133 0x2062,
134 0x2063,
135 0x2064,
136 0x206A,
137 0x206B,
138 0x206C,
139 0x206D,
140 0x206E,
141 0x206F,
142 0xFEFF,
143 0xFFF9,
144 0xFFFA,
145 0xFFFB,
146 };
147
148 @Override
149 protected String getPackageLocation() {
150 return "com/puppycrawl/tools/checkstyle/checks/avoidescapedunicodecharacters";
151 }
152
153 @Test
154 public void testGetRequiredTokens() {
155 final AvoidEscapedUnicodeCharactersCheck checkObj =
156 new AvoidEscapedUnicodeCharactersCheck();
157 final int[] expected = {
158 TokenTypes.STRING_LITERAL,
159 TokenTypes.CHAR_LITERAL,
160 TokenTypes.TEXT_BLOCK_CONTENT,
161 };
162 assertWithMessage("Required tokens differ from expected")
163 .that(checkObj.getRequiredTokens())
164 .isEqualTo(expected);
165 }
166
167 @Test
168 public void testDefault() throws Exception {
169 final String[] expected = {
170 "17:38: " + getCheckMessage(MSG_KEY),
171 "19:38: " + getCheckMessage(MSG_KEY),
172 "21:38: " + getCheckMessage(MSG_KEY),
173 "25:38: " + getCheckMessage(MSG_KEY),
174 "26:38: " + getCheckMessage(MSG_KEY),
175 "30:24: " + getCheckMessage(MSG_KEY),
176 "34:36: " + getCheckMessage(MSG_KEY),
177 "35:36: " + getCheckMessage(MSG_KEY),
178 "37:24: " + getCheckMessage(MSG_KEY),
179 "41:38: " + getCheckMessage(MSG_KEY),
180 "42:38: " + getCheckMessage(MSG_KEY),
181 "43:38: " + getCheckMessage(MSG_KEY),
182 "44:47: " + getCheckMessage(MSG_KEY),
183 "52:32: " + getCheckMessage(MSG_KEY),
184 "69:35: " + getCheckMessage(MSG_KEY),
185 "70:35: " + getCheckMessage(MSG_KEY),
186 "71:35: " + getCheckMessage(MSG_KEY),
187 "72:35: " + getCheckMessage(MSG_KEY),
188 "82:24: " + getCheckMessage(MSG_KEY),
189 "83:24: " + getCheckMessage(MSG_KEY),
190 "84:24: " + getCheckMessage(MSG_KEY),
191 "85:24: " + getCheckMessage(MSG_KEY),
192 "86:24: " + getCheckMessage(MSG_KEY),
193 "87:24: " + getCheckMessage(MSG_KEY),
194 "89:24: " + getCheckMessage(MSG_KEY),
195 "92:31: " + getCheckMessage(MSG_KEY),
196 "92:48: " + getCheckMessage(MSG_KEY),
197 "96:38: " + getCheckMessage(MSG_KEY),
198 "97:38: " + getCheckMessage(MSG_KEY),
199 "98:38: " + getCheckMessage(MSG_KEY),
200 "99:38: " + getCheckMessage(MSG_KEY),
201 "102:31: " + getCheckMessage(MSG_KEY),
202 "102:45: " + getCheckMessage(MSG_KEY),
203 "103:34: " + getCheckMessage(MSG_KEY),
204 "104:46: " + getCheckMessage(MSG_KEY),
205 "108:38: " + getCheckMessage(MSG_KEY),
206 "114:38: " + getCheckMessage(MSG_KEY),
207 "116:46: " + getCheckMessage(MSG_KEY),
208 "117:55: " + getCheckMessage(MSG_KEY),
209 "118:46: " + getCheckMessage(MSG_KEY),
210 "119:55: " + getCheckMessage(MSG_KEY),
211 "120:46: " + getCheckMessage(MSG_KEY),
212 "121:55: " + getCheckMessage(MSG_KEY),
213 "122:46: " + getCheckMessage(MSG_KEY),
214 "123:55: " + getCheckMessage(MSG_KEY),
215 "124:46: " + getCheckMessage(MSG_KEY),
216 "125:55: " + getCheckMessage(MSG_KEY),
217 "126:48: " + getCheckMessage(MSG_KEY),
218 "127:57: " + getCheckMessage(MSG_KEY),
219 };
220 verifyWithInlineConfigParser(
221 getPath("InputAvoidEscapedUnicodeCharacters.java"), expected);
222 }
223
224 @Test
225 public void testAllowEscapesForControlCharacterSet() throws Exception {
226 final String[] expected = {
227 "17:38: " + getCheckMessage(MSG_KEY),
228 "19:38: " + getCheckMessage(MSG_KEY),
229 "21:38: " + getCheckMessage(MSG_KEY),
230 "25:38: " + getCheckMessage(MSG_KEY),
231 "26:38: " + getCheckMessage(MSG_KEY),
232 "34:36: " + getCheckMessage(MSG_KEY),
233 "35:36: " + getCheckMessage(MSG_KEY),
234 "41:38: " + getCheckMessage(MSG_KEY),
235 "42:38: " + getCheckMessage(MSG_KEY),
236 "43:38: " + getCheckMessage(MSG_KEY),
237 "44:47: " + getCheckMessage(MSG_KEY),
238 "52:32: " + getCheckMessage(MSG_KEY),
239 "69:35: " + getCheckMessage(MSG_KEY),
240 "70:35: " + getCheckMessage(MSG_KEY),
241 "71:35: " + getCheckMessage(MSG_KEY),
242 "72:35: " + getCheckMessage(MSG_KEY),
243 "83:24: " + getCheckMessage(MSG_KEY),
244 "84:24: " + getCheckMessage(MSG_KEY),
245 "85:24: " + getCheckMessage(MSG_KEY),
246 "86:24: " + getCheckMessage(MSG_KEY),
247 "87:24: " + getCheckMessage(MSG_KEY),
248 "89:24: " + getCheckMessage(MSG_KEY),
249 "92:31: " + getCheckMessage(MSG_KEY),
250 "92:48: " + getCheckMessage(MSG_KEY),
251 "96:38: " + getCheckMessage(MSG_KEY),
252 "97:38: " + getCheckMessage(MSG_KEY),
253 "98:38: " + getCheckMessage(MSG_KEY),
254 "99:38: " + getCheckMessage(MSG_KEY),
255 "102:45: " + getCheckMessage(MSG_KEY),
256 "104:46: " + getCheckMessage(MSG_KEY),
257 "108:38: " + getCheckMessage(MSG_KEY),
258 "114:38: " + getCheckMessage(MSG_KEY),
259 "116:46: " + getCheckMessage(MSG_KEY),
260 "117:55: " + getCheckMessage(MSG_KEY),
261 "118:46: " + getCheckMessage(MSG_KEY),
262 "119:55: " + getCheckMessage(MSG_KEY),
263 "120:46: " + getCheckMessage(MSG_KEY),
264 "121:55: " + getCheckMessage(MSG_KEY),
265 "122:46: " + getCheckMessage(MSG_KEY),
266 "123:55: " + getCheckMessage(MSG_KEY),
267 "124:46: " + getCheckMessage(MSG_KEY),
268 "125:55: " + getCheckMessage(MSG_KEY),
269 "126:48: " + getCheckMessage(MSG_KEY),
270 "127:57: " + getCheckMessage(MSG_KEY),
271 };
272 verifyWithInlineConfigParser(
273 getPath("InputAvoidEscapedUnicodeCharacters1.java"), expected);
274 }
275
276 @Test
277 public void testAllowByTailComment() throws Exception {
278 final String[] expected = {
279 "17:38: " + getCheckMessage(MSG_KEY),
280 "25:38: " + getCheckMessage(MSG_KEY),
281 "34:36: " + getCheckMessage(MSG_KEY),
282 "41:38: " + getCheckMessage(MSG_KEY),
283 "43:38: " + getCheckMessage(MSG_KEY),
284 "44:47: " + getCheckMessage(MSG_KEY),
285 "69:35: " + getCheckMessage(MSG_KEY),
286 "71:35: " + getCheckMessage(MSG_KEY),
287 "73:35: " + getCheckMessage(MSG_KEY),
288 "75:35: " + getCheckMessage(MSG_KEY),
289 "86:24: " + getCheckMessage(MSG_KEY),
290 "88:24: " + getCheckMessage(MSG_KEY),
291 "90:24: " + getCheckMessage(MSG_KEY),
292 "92:24: " + getCheckMessage(MSG_KEY),
293 "94:24: " + getCheckMessage(MSG_KEY),
294 "97:24: " + getCheckMessage(MSG_KEY),
295 "101:31: " + getCheckMessage(MSG_KEY),
296 "101:48: " + getCheckMessage(MSG_KEY),
297 "111:31: " + getCheckMessage(MSG_KEY),
298 "111:45: " + getCheckMessage(MSG_KEY),
299 "117:38: " + getCheckMessage(MSG_KEY),
300 "123:38: " + getCheckMessage(MSG_KEY),
301 "125:46: " + getCheckMessage(MSG_KEY),
302 "128:46: " + getCheckMessage(MSG_KEY),
303 "131:46: " + getCheckMessage(MSG_KEY),
304 "134:46: " + getCheckMessage(MSG_KEY),
305 "137:46: " + getCheckMessage(MSG_KEY),
306 "140:48: " + getCheckMessage(MSG_KEY),
307 };
308 verifyWithInlineConfigParser(
309 getPath("InputAvoidEscapedUnicodeCharacters2.java"), expected);
310 }
311
312 @Test
313 public void testAllowAllCharactersEscaped() throws Exception {
314 final String[] expected = {
315 "17:38: " + getCheckMessage(MSG_KEY),
316 "19:38: " + getCheckMessage(MSG_KEY),
317 "21:38: " + getCheckMessage(MSG_KEY),
318 "25:38: " + getCheckMessage(MSG_KEY),
319 "26:38: " + getCheckMessage(MSG_KEY),
320 "41:38: " + getCheckMessage(MSG_KEY),
321 "42:38: " + getCheckMessage(MSG_KEY),
322 "43:38: " + getCheckMessage(MSG_KEY),
323 "52:32: " + getCheckMessage(MSG_KEY),
324 "96:38: " + getCheckMessage(MSG_KEY),
325 "97:38: " + getCheckMessage(MSG_KEY),
326 "98:38: " + getCheckMessage(MSG_KEY),
327 "99:38: " + getCheckMessage(MSG_KEY),
328 "108:38: " + getCheckMessage(MSG_KEY),
329 "116:46: " + getCheckMessage(MSG_KEY),
330 "117:55: " + getCheckMessage(MSG_KEY),
331 "118:46: " + getCheckMessage(MSG_KEY),
332 "119:55: " + getCheckMessage(MSG_KEY),
333 "120:46: " + getCheckMessage(MSG_KEY),
334 "121:55: " + getCheckMessage(MSG_KEY),
335 "122:46: " + getCheckMessage(MSG_KEY),
336 "123:55: " + getCheckMessage(MSG_KEY),
337 "124:46: " + getCheckMessage(MSG_KEY),
338 "125:55: " + getCheckMessage(MSG_KEY),
339 "126:48: " + getCheckMessage(MSG_KEY),
340 "127:57: " + getCheckMessage(MSG_KEY),
341 };
342 verifyWithInlineConfigParser(
343 getPath("InputAvoidEscapedUnicodeCharacters3.java"), expected);
344 }
345
346 @Test
347 public void allowNonPrintableEscapesOne() throws Exception {
348 final String[] expected = {
349 "17:38: " + getCheckMessage(MSG_KEY),
350 "19:38: " + getCheckMessage(MSG_KEY),
351 "21:38: " + getCheckMessage(MSG_KEY),
352 "25:38: " + getCheckMessage(MSG_KEY),
353 "26:38: " + getCheckMessage(MSG_KEY),
354 "34:36: " + getCheckMessage(MSG_KEY),
355 "35:36: " + getCheckMessage(MSG_KEY),
356 "41:38: " + getCheckMessage(MSG_KEY),
357 "42:38: " + getCheckMessage(MSG_KEY),
358 "43:38: " + getCheckMessage(MSG_KEY),
359 "44:47: " + getCheckMessage(MSG_KEY),
360 "52:32: " + getCheckMessage(MSG_KEY),
361 };
362 verifyWithInlineConfigParser(
363 getPath("InputAvoidEscapedUnicodeCharacters4One.java"), expected);
364 }
365
366 @Test
367 public void allowNonPrintableEscapesTwo() throws Exception {
368 final String[] expected = {
369 "17:38: " + getCheckMessage(MSG_KEY),
370 "18:38: " + getCheckMessage(MSG_KEY),
371 "19:38: " + getCheckMessage(MSG_KEY),
372 "20:38: " + getCheckMessage(MSG_KEY),
373 "24:34: " + getCheckMessage(MSG_KEY),
374 "25:46: " + getCheckMessage(MSG_KEY),
375 "29:38: " + getCheckMessage(MSG_KEY),
376 "35:38: " + getCheckMessage(MSG_KEY),
377 "37:46: " + getCheckMessage(MSG_KEY),
378 "38:55: " + getCheckMessage(MSG_KEY),
379 "39:46: " + getCheckMessage(MSG_KEY),
380 "40:55: " + getCheckMessage(MSG_KEY),
381 "41:46: " + getCheckMessage(MSG_KEY),
382 "42:55: " + getCheckMessage(MSG_KEY),
383 "43:46: " + getCheckMessage(MSG_KEY),
384 "44:55: " + getCheckMessage(MSG_KEY),
385 "45:46: " + getCheckMessage(MSG_KEY),
386 "46:55: " + getCheckMessage(MSG_KEY),
387 "47:48: " + getCheckMessage(MSG_KEY),
388 "48:57: " + getCheckMessage(MSG_KEY),
389 };
390 verifyWithInlineConfigParser(
391 getPath("InputAvoidEscapedUnicodeCharacters4Two.java"), expected);
392 }
393
394 @Test
395 public void testAllowByTailCommentWithEmoji() throws Exception {
396 final String[] expected = {
397 "15:24: " + getCheckMessage(MSG_KEY),
398 "18:24: " + getCheckMessage(MSG_KEY),
399 "22:30: " + getCheckMessage(MSG_KEY),
400 "32:18: " + getCheckMessage(MSG_KEY),
401 "35:18: " + getCheckMessage(MSG_KEY),
402 "37:18: " + getCheckMessage(MSG_KEY),
403 "40:18: " + getCheckMessage(MSG_KEY),
404 };
405 verifyWithInlineConfigParser(
406 getPath("InputAvoidEscapedUnicodeCharacters5.java"), expected);
407 }
408
409 @Test
410 public void testAvoidEscapedUnicodeCharactersTextBlocksAllowByComment() throws Exception {
411 final String[] expected = {
412 "18:30: " + getCheckMessage(MSG_KEY),
413 "20:30: " + getCheckMessage(MSG_KEY),
414 "22:30: " + getCheckMessage(MSG_KEY),
415 "25:39: " + getCheckMessage(MSG_KEY),
416 "30:33: " + getCheckMessage(MSG_KEY),
417 "33:33: " + getCheckMessage(MSG_KEY),
418 "36:33: " + getCheckMessage(MSG_KEY),
419 "41:42: " + getCheckMessage(MSG_KEY),
420 };
421 verifyWithInlineConfigParser(
422 getPath(
423 "InputAvoidEscapedUnicodeCharactersTextBlocksAllowByComment.java"),
424 expected);
425 }
426
427 @Test
428 public void testAvoidEscapedUnicodeCharactersTextBlocks() throws Exception {
429 final String[] expected = {
430 "17:30: " + getCheckMessage(MSG_KEY),
431 "18:30: " + getCheckMessage(MSG_KEY),
432 "19:30: " + getCheckMessage(MSG_KEY),
433 "20:39: " + getCheckMessage(MSG_KEY),
434 "24:33: " + getCheckMessage(MSG_KEY),
435 "26:33: " + getCheckMessage(MSG_KEY),
436 "28:33: " + getCheckMessage(MSG_KEY),
437 "30:42: " + getCheckMessage(MSG_KEY),
438 };
439 verifyWithInlineConfigParser(
440 getPath("InputAvoidEscapedUnicodeCharactersTextBlocks.java"),
441 expected);
442 }
443
444 @Test
445 public void testAvoidEscapedUnicodeCharactersEscapedS() throws Exception {
446 final String[] expected = {
447 "17:21: " + getCheckMessage(MSG_KEY),
448 "18:22: " + getCheckMessage(MSG_KEY),
449 "27:39: " + getCheckMessage(MSG_KEY),
450 "30:39: " + getCheckMessage(MSG_KEY),
451 "33:39: " + getCheckMessage(MSG_KEY),
452 "36:22: " + getCheckMessage(MSG_KEY),
453 };
454 verifyWithInlineConfigParser(
455 getPath("InputAvoidEscapedUnicodeCharactersEscapedS.java"),
456 expected);
457 }
458
459 @Test
460 public void testGetAcceptableTokens() {
461 final AvoidEscapedUnicodeCharactersCheck check = new AvoidEscapedUnicodeCharactersCheck();
462 final int[] actual = check.getAcceptableTokens();
463 final int[] expected = {
464 TokenTypes.STRING_LITERAL,
465 TokenTypes.CHAR_LITERAL,
466 TokenTypes.TEXT_BLOCK_CONTENT,
467 };
468 assertWithMessage("Acceptable tokens differ from expected")
469 .that(actual)
470 .isEqualTo(expected);
471 }
472
473 @Test
474 public void testAllowEscapesForControlCharacterSetForAllCharacters() throws Exception {
475
476 final int indexOfStartLineInInputFile = 16;
477 final String message = getCheckMessage(MSG_KEY);
478 final String[] expected = IntStream.rangeClosed(0, 0xFFFF)
479 .parallel()
480 .filter(val -> !isControlCharacter(val))
481 .mapToObj(msg -> indexOfStartLineInInputFile + msg + ":54: " + message)
482 .toArray(String[]::new);
483 verifyWithInlineConfigParser(
484 getPath("InputAvoidEscapedUnicodeCharactersAllEscapedUnicodeCharacters.java"),
485 expected);
486 }
487
488
489
490
491
492
493
494
495
496
497
498 @Test
499 public void testCountMatches() throws Exception {
500 final AvoidEscapedUnicodeCharactersCheck check = new AvoidEscapedUnicodeCharactersCheck();
501 final int actual = TestUtil.invokeMethod(check, "countMatches",
502 Pattern.compile("\\\\u[a-fA-F\\d]{4}"), "\\u1234");
503 assertWithMessage("Unexpected matches count")
504 .that(actual)
505 .isEqualTo(1);
506 }
507
508
509
510
511
512 @Test
513 public void testNonPrintableCharsAreSorted() {
514 String expression = TestUtil.getInternalStaticState(
515 AvoidEscapedUnicodeCharactersCheck.class,
516 "NON_PRINTABLE_CHARS", Pattern.class).pattern();
517
518
519 final String[] charExpressions = {"Aa", "Bb", "Cc", "Dd", "Ee", "Ff"};
520 for (String charExpression : charExpressions) {
521 final String regex = "\\[[" + charExpression + "]{2}]";
522 expression = expression.replaceAll(regex, charExpression.substring(0, 1));
523 }
524
525
526 for (int i = 4; i > 1; i--) {
527 final String regex = "([A-F])\\{" + i + "}";
528 String replacement = "$1$1{" + (i - 1) + "}";
529 if (i == 2) {
530 replacement = "$1$1";
531 }
532 expression = expression.replaceAll(regex, replacement);
533 }
534
535
536 final List<String> expressionParts = Splitter.on("|").splitToList(expression);
537 final Pattern unicodeCharPattern = Pattern.compile("^\\\\\\\\u[\\dA-F]{4}$");
538 String lastChar = null;
539 for (int i = 0; i < expressionParts.size(); i++) {
540 final String currentChar = expressionParts.get(i);
541 final Matcher matcher = unicodeCharPattern.matcher(currentChar);
542 if (!matcher.matches()) {
543 final String message = "Character '" + currentChar + "' (at position " + i
544 + ") doesn't match the pattern";
545 assertWithMessage(message)
546 .that(matcher.matches())
547 .isTrue();
548 }
549 if (lastChar != null) {
550 final String message = "Character '" + lastChar + "' should be after '"
551 + currentChar + "', position: " + i;
552 assertWithMessage(message)
553 .that(lastChar.compareTo(currentChar) < 0)
554 .isTrue();
555 }
556 lastChar = currentChar;
557 }
558 }
559
560 private static boolean isControlCharacter(final int character) {
561 return Arrays.binarySearch(C0_CONTROL_CHARACTER_INDICES, character) >= 0
562 || Arrays.binarySearch(C1_CONTROL_CHARACTER_INDICES, character) >= 0
563 || Arrays.binarySearch(OTHER_CONTROL_CHARACTER_INDICES, character) >= 0;
564 }
565
566 }