1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package com.puppycrawl.tools.checkstyle.checks;
21
22 import static com.google.common.truth.Truth.assertWithMessage;
23 import static com.puppycrawl.tools.checkstyle.checks.AvoidEscapedUnicodeCharactersCheck.MSG_KEY;
24
25 import java.util.Arrays;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
28 import java.util.stream.IntStream;
29
30 import org.junit.jupiter.api.Test;
31
32 import com.puppycrawl.tools.checkstyle.AbstractModuleTestSupport;
33 import com.puppycrawl.tools.checkstyle.api.TokenTypes;
34 import com.puppycrawl.tools.checkstyle.internal.utils.TestUtil;
35
36 public class AvoidEscapedUnicodeCharactersCheckTest extends AbstractModuleTestSupport {
37
38
39
40 private static final int[] C0_CONTROL_CHARACTER_INDICES = {
41 0x0000,
42 0x0001,
43 0x0002,
44 0x0003,
45 0x0004,
46 0x0005,
47 0x0006,
48 0x0007,
49 0x0008,
50 0x0009,
51 0x000a,
52 0x000b,
53 0x000c,
54 0x000d,
55 0x000e,
56 0x000f,
57 0x0010,
58 0x0011,
59 0x0012,
60 0x0013,
61 0x0014,
62 0x0015,
63 0x0016,
64 0x0017,
65 0x0018,
66 0x0019,
67 0x001a,
68 0x001b,
69 0x001c,
70 0x001d,
71 0x001e,
72 0x001f,
73 };
74
75
76
77 private static final int[] C1_CONTROL_CHARACTER_INDICES = {
78 0x0080,
79 0x0081,
80 0x0082,
81 0x0083,
82 0x0084,
83 0x0085,
84 0x0086,
85 0x0087,
86 0x0088,
87 0x0089,
88 0x008a,
89 0x008b,
90 0x008c,
91 0x008d,
92 0x008e,
93 0x008f,
94 0x0090,
95 0x0091,
96 0x0092,
97 0x0093,
98 0x0094,
99 0x0095,
100 0x0096,
101 0x0097,
102 0x0098,
103 0x0099,
104 0x009a,
105 0x009b,
106 0x009c,
107 0x009d,
108 0x009e,
109 0x009f,
110 };
111
112
113
114 private static final int[] OTHER_CONTROL_CHARACTER_INDICES = {
115 0x00ad,
116 0x034f,
117 0x070f,
118 0x180e,
119 0x200b,
120 0x200c,
121 0x200d,
122 0x200e,
123 0x200f,
124 0x202a,
125 0x202b,
126 0x202c,
127 0x202d,
128 0x202e,
129 0x2060,
130 0x2061,
131 0x2062,
132 0x2063,
133 0x2064,
134 0x206a,
135 0x206b,
136 0x206c,
137 0x206d,
138 0x206e,
139 0x206f,
140 0xfeff,
141 0xfff9,
142 0xfffa,
143 0xfffb,
144 };
145
146 @Override
147 protected String getPackageLocation() {
148 return "com/puppycrawl/tools/checkstyle/checks/avoidescapedunicodecharacters";
149 }
150
151 @Test
152 public void testGetRequiredTokens() {
153 final AvoidEscapedUnicodeCharactersCheck checkObj =
154 new AvoidEscapedUnicodeCharactersCheck();
155 final int[] expected = {
156 TokenTypes.STRING_LITERAL,
157 TokenTypes.CHAR_LITERAL,
158 TokenTypes.TEXT_BLOCK_CONTENT,
159 };
160 assertWithMessage("Required tokens differ from expected")
161 .that(checkObj.getRequiredTokens())
162 .isEqualTo(expected);
163 }
164
165 @Test
166 public void testDefault() throws Exception {
167 final String[] expected = {
168 "17:38: " + getCheckMessage(MSG_KEY),
169 "19:38: " + getCheckMessage(MSG_KEY),
170 "21:38: " + getCheckMessage(MSG_KEY),
171 "25:38: " + getCheckMessage(MSG_KEY),
172 "26:38: " + getCheckMessage(MSG_KEY),
173 "30:24: " + getCheckMessage(MSG_KEY),
174 "34:36: " + getCheckMessage(MSG_KEY),
175 "35:36: " + getCheckMessage(MSG_KEY),
176 "37:24: " + getCheckMessage(MSG_KEY),
177 "41:38: " + getCheckMessage(MSG_KEY),
178 "42:38: " + getCheckMessage(MSG_KEY),
179 "43:38: " + getCheckMessage(MSG_KEY),
180 "44:47: " + getCheckMessage(MSG_KEY),
181 "52:32: " + getCheckMessage(MSG_KEY),
182 "69:35: " + getCheckMessage(MSG_KEY),
183 "70:35: " + getCheckMessage(MSG_KEY),
184 "71:35: " + getCheckMessage(MSG_KEY),
185 "72:35: " + getCheckMessage(MSG_KEY),
186 "82:24: " + getCheckMessage(MSG_KEY),
187 "83:24: " + getCheckMessage(MSG_KEY),
188 "84:24: " + getCheckMessage(MSG_KEY),
189 "85:24: " + getCheckMessage(MSG_KEY),
190 "86:24: " + getCheckMessage(MSG_KEY),
191 "87:24: " + getCheckMessage(MSG_KEY),
192 "89:24: " + getCheckMessage(MSG_KEY),
193 "92:31: " + getCheckMessage(MSG_KEY),
194 "92:48: " + getCheckMessage(MSG_KEY),
195 "96:38: " + getCheckMessage(MSG_KEY),
196 "97:38: " + getCheckMessage(MSG_KEY),
197 "98:38: " + getCheckMessage(MSG_KEY),
198 "99:38: " + getCheckMessage(MSG_KEY),
199 "102:31: " + getCheckMessage(MSG_KEY),
200 "102:45: " + getCheckMessage(MSG_KEY),
201 "103:34: " + getCheckMessage(MSG_KEY),
202 "104:46: " + getCheckMessage(MSG_KEY),
203 "108:38: " + getCheckMessage(MSG_KEY),
204 "114:38: " + getCheckMessage(MSG_KEY),
205 "116:46: " + getCheckMessage(MSG_KEY),
206 "117:55: " + getCheckMessage(MSG_KEY),
207 "118:46: " + getCheckMessage(MSG_KEY),
208 "119:55: " + getCheckMessage(MSG_KEY),
209 "120:46: " + getCheckMessage(MSG_KEY),
210 "121:55: " + getCheckMessage(MSG_KEY),
211 "122:46: " + getCheckMessage(MSG_KEY),
212 "123:55: " + getCheckMessage(MSG_KEY),
213 "124:46: " + getCheckMessage(MSG_KEY),
214 "125:55: " + getCheckMessage(MSG_KEY),
215 "126:48: " + getCheckMessage(MSG_KEY),
216 "127:57: " + getCheckMessage(MSG_KEY),
217 };
218 verifyWithInlineConfigParser(
219 getPath("InputAvoidEscapedUnicodeCharacters.java"), expected);
220 }
221
222 @Test
223 public void testAllowEscapesForControlCharacterSet() throws Exception {
224 final String[] expected = {
225 "17:38: " + getCheckMessage(MSG_KEY),
226 "19:38: " + getCheckMessage(MSG_KEY),
227 "21:38: " + getCheckMessage(MSG_KEY),
228 "25:38: " + getCheckMessage(MSG_KEY),
229 "26:38: " + getCheckMessage(MSG_KEY),
230 "34:36: " + getCheckMessage(MSG_KEY),
231 "35:36: " + getCheckMessage(MSG_KEY),
232 "41:38: " + getCheckMessage(MSG_KEY),
233 "42:38: " + getCheckMessage(MSG_KEY),
234 "43:38: " + getCheckMessage(MSG_KEY),
235 "44:47: " + getCheckMessage(MSG_KEY),
236 "52:32: " + getCheckMessage(MSG_KEY),
237 "69:35: " + getCheckMessage(MSG_KEY),
238 "70:35: " + getCheckMessage(MSG_KEY),
239 "71:35: " + getCheckMessage(MSG_KEY),
240 "72:35: " + getCheckMessage(MSG_KEY),
241 "83:24: " + getCheckMessage(MSG_KEY),
242 "84:24: " + getCheckMessage(MSG_KEY),
243 "85:24: " + getCheckMessage(MSG_KEY),
244 "86:24: " + getCheckMessage(MSG_KEY),
245 "87:24: " + getCheckMessage(MSG_KEY),
246 "89:24: " + getCheckMessage(MSG_KEY),
247 "92:31: " + getCheckMessage(MSG_KEY),
248 "92:48: " + getCheckMessage(MSG_KEY),
249 "96:38: " + getCheckMessage(MSG_KEY),
250 "97:38: " + getCheckMessage(MSG_KEY),
251 "98:38: " + getCheckMessage(MSG_KEY),
252 "99:38: " + getCheckMessage(MSG_KEY),
253 "102:45: " + getCheckMessage(MSG_KEY),
254 "104:46: " + getCheckMessage(MSG_KEY),
255 "108:38: " + getCheckMessage(MSG_KEY),
256 "114:38: " + getCheckMessage(MSG_KEY),
257 "116:46: " + getCheckMessage(MSG_KEY),
258 "117:55: " + getCheckMessage(MSG_KEY),
259 "118:46: " + getCheckMessage(MSG_KEY),
260 "119:55: " + getCheckMessage(MSG_KEY),
261 "120:46: " + getCheckMessage(MSG_KEY),
262 "121:55: " + getCheckMessage(MSG_KEY),
263 "122:46: " + getCheckMessage(MSG_KEY),
264 "123:55: " + getCheckMessage(MSG_KEY),
265 "124:46: " + getCheckMessage(MSG_KEY),
266 "125:55: " + getCheckMessage(MSG_KEY),
267 "126:48: " + getCheckMessage(MSG_KEY),
268 "127:57: " + getCheckMessage(MSG_KEY),
269 };
270 verifyWithInlineConfigParser(
271 getPath("InputAvoidEscapedUnicodeCharacters1.java"), expected);
272 }
273
274 @Test
275 public void testAllowByTailComment() throws Exception {
276 final String[] expected = {
277 "17:38: " + getCheckMessage(MSG_KEY),
278 "25:38: " + getCheckMessage(MSG_KEY),
279 "34:36: " + getCheckMessage(MSG_KEY),
280 "41:38: " + getCheckMessage(MSG_KEY),
281 "43:38: " + getCheckMessage(MSG_KEY),
282 "44:47: " + getCheckMessage(MSG_KEY),
283 "69:35: " + getCheckMessage(MSG_KEY),
284 "71:35: " + getCheckMessage(MSG_KEY),
285 "73:35: " + getCheckMessage(MSG_KEY),
286 "75:35: " + getCheckMessage(MSG_KEY),
287 "86:24: " + getCheckMessage(MSG_KEY),
288 "88:24: " + getCheckMessage(MSG_KEY),
289 "90:24: " + getCheckMessage(MSG_KEY),
290 "92:24: " + getCheckMessage(MSG_KEY),
291 "94:24: " + getCheckMessage(MSG_KEY),
292 "97:24: " + getCheckMessage(MSG_KEY),
293 "101:31: " + getCheckMessage(MSG_KEY),
294 "101:48: " + getCheckMessage(MSG_KEY),
295 "111:31: " + getCheckMessage(MSG_KEY),
296 "111:45: " + getCheckMessage(MSG_KEY),
297 "117:38: " + getCheckMessage(MSG_KEY),
298 "123:38: " + getCheckMessage(MSG_KEY),
299 "125:46: " + getCheckMessage(MSG_KEY),
300 "128:46: " + getCheckMessage(MSG_KEY),
301 "131:46: " + getCheckMessage(MSG_KEY),
302 "134:46: " + getCheckMessage(MSG_KEY),
303 "137:46: " + getCheckMessage(MSG_KEY),
304 "140:48: " + getCheckMessage(MSG_KEY),
305 };
306 verifyWithInlineConfigParser(
307 getPath("InputAvoidEscapedUnicodeCharacters2.java"), expected);
308 }
309
310 @Test
311 public void testAllowAllCharactersEscaped() throws Exception {
312 final String[] expected = {
313 "17:38: " + getCheckMessage(MSG_KEY),
314 "19:38: " + getCheckMessage(MSG_KEY),
315 "21:38: " + getCheckMessage(MSG_KEY),
316 "25:38: " + getCheckMessage(MSG_KEY),
317 "26:38: " + getCheckMessage(MSG_KEY),
318 "41:38: " + getCheckMessage(MSG_KEY),
319 "42:38: " + getCheckMessage(MSG_KEY),
320 "43:38: " + getCheckMessage(MSG_KEY),
321 "52:32: " + getCheckMessage(MSG_KEY),
322 "96:38: " + getCheckMessage(MSG_KEY),
323 "97:38: " + getCheckMessage(MSG_KEY),
324 "98:38: " + getCheckMessage(MSG_KEY),
325 "99:38: " + getCheckMessage(MSG_KEY),
326 "108:38: " + getCheckMessage(MSG_KEY),
327 "116:46: " + getCheckMessage(MSG_KEY),
328 "117:55: " + getCheckMessage(MSG_KEY),
329 "118:46: " + getCheckMessage(MSG_KEY),
330 "119:55: " + getCheckMessage(MSG_KEY),
331 "120:46: " + getCheckMessage(MSG_KEY),
332 "121:55: " + getCheckMessage(MSG_KEY),
333 "122:46: " + getCheckMessage(MSG_KEY),
334 "123:55: " + getCheckMessage(MSG_KEY),
335 "124:46: " + getCheckMessage(MSG_KEY),
336 "125:55: " + getCheckMessage(MSG_KEY),
337 "126:48: " + getCheckMessage(MSG_KEY),
338 "127:57: " + getCheckMessage(MSG_KEY),
339 };
340 verifyWithInlineConfigParser(
341 getPath("InputAvoidEscapedUnicodeCharacters3.java"), expected);
342 }
343
344 @Test
345 public void allowNonPrintableEscapesOne() throws Exception {
346 final String[] expected = {
347 "17:38: " + getCheckMessage(MSG_KEY),
348 "19:38: " + getCheckMessage(MSG_KEY),
349 "21:38: " + getCheckMessage(MSG_KEY),
350 "25:38: " + getCheckMessage(MSG_KEY),
351 "26:38: " + getCheckMessage(MSG_KEY),
352 "34:36: " + getCheckMessage(MSG_KEY),
353 "35:36: " + getCheckMessage(MSG_KEY),
354 "41:38: " + getCheckMessage(MSG_KEY),
355 "42:38: " + getCheckMessage(MSG_KEY),
356 "43:38: " + getCheckMessage(MSG_KEY),
357 "44:47: " + getCheckMessage(MSG_KEY),
358 "52:32: " + getCheckMessage(MSG_KEY),
359 };
360 verifyWithInlineConfigParser(
361 getPath("InputAvoidEscapedUnicodeCharacters4One.java"), expected);
362 }
363
364 @Test
365 public void allowNonPrintableEscapesTwo() throws Exception {
366 final String[] expected = {
367 "17:38: " + getCheckMessage(MSG_KEY),
368 "18:38: " + getCheckMessage(MSG_KEY),
369 "19:38: " + getCheckMessage(MSG_KEY),
370 "20:38: " + getCheckMessage(MSG_KEY),
371 "24:34: " + getCheckMessage(MSG_KEY),
372 "25:46: " + getCheckMessage(MSG_KEY),
373 "29:38: " + getCheckMessage(MSG_KEY),
374 "35:38: " + getCheckMessage(MSG_KEY),
375 "37:46: " + getCheckMessage(MSG_KEY),
376 "38:55: " + getCheckMessage(MSG_KEY),
377 "39:46: " + getCheckMessage(MSG_KEY),
378 "40:55: " + getCheckMessage(MSG_KEY),
379 "41:46: " + getCheckMessage(MSG_KEY),
380 "42:55: " + getCheckMessage(MSG_KEY),
381 "43:46: " + getCheckMessage(MSG_KEY),
382 "44:55: " + getCheckMessage(MSG_KEY),
383 "45:46: " + getCheckMessage(MSG_KEY),
384 "46:55: " + getCheckMessage(MSG_KEY),
385 "47:48: " + getCheckMessage(MSG_KEY),
386 "48:57: " + getCheckMessage(MSG_KEY),
387 };
388 verifyWithInlineConfigParser(
389 getPath("InputAvoidEscapedUnicodeCharacters4Two.java"), expected);
390 }
391
392 @Test
393 public void testAllowByTailCommentWithEmoji() throws Exception {
394 final String[] expected = {
395 "15:24: " + getCheckMessage(MSG_KEY),
396 "18:24: " + getCheckMessage(MSG_KEY),
397 "22:30: " + getCheckMessage(MSG_KEY),
398 "32:18: " + getCheckMessage(MSG_KEY),
399 "35:18: " + getCheckMessage(MSG_KEY),
400 "37:18: " + getCheckMessage(MSG_KEY),
401 "40:18: " + getCheckMessage(MSG_KEY),
402 };
403 verifyWithInlineConfigParser(
404 getPath("InputAvoidEscapedUnicodeCharacters5.java"), expected);
405 }
406
407 @Test
408 public void testAvoidEscapedUnicodeCharactersTextBlocksAllowByComment() throws Exception {
409 final String[] expected = {
410 "18:30: " + getCheckMessage(MSG_KEY),
411 "20:30: " + getCheckMessage(MSG_KEY),
412 "22:30: " + getCheckMessage(MSG_KEY),
413 "25:39: " + getCheckMessage(MSG_KEY),
414 "30:33: " + getCheckMessage(MSG_KEY),
415 "33:33: " + getCheckMessage(MSG_KEY),
416 "36:33: " + getCheckMessage(MSG_KEY),
417 "41:42: " + getCheckMessage(MSG_KEY),
418 };
419 verifyWithInlineConfigParser(
420 getNonCompilablePath(
421 "InputAvoidEscapedUnicodeCharactersTextBlocksAllowByComment.java"),
422 expected);
423 }
424
425 @Test
426 public void testAvoidEscapedUnicodeCharactersTextBlocks() throws Exception {
427 final String[] expected = {
428 "17:30: " + getCheckMessage(MSG_KEY),
429 "18:30: " + getCheckMessage(MSG_KEY),
430 "19:30: " + getCheckMessage(MSG_KEY),
431 "20:39: " + getCheckMessage(MSG_KEY),
432 "24:33: " + getCheckMessage(MSG_KEY),
433 "26:33: " + getCheckMessage(MSG_KEY),
434 "28:33: " + getCheckMessage(MSG_KEY),
435 "30:42: " + getCheckMessage(MSG_KEY),
436 };
437 verifyWithInlineConfigParser(
438 getNonCompilablePath("InputAvoidEscapedUnicodeCharactersTextBlocks.java"),
439 expected);
440 }
441
442 @Test
443 public void testAvoidEscapedUnicodeCharactersEscapedS() throws Exception {
444 final String[] expected = {
445 "17:21: " + getCheckMessage(MSG_KEY),
446 "18:22: " + getCheckMessage(MSG_KEY),
447 "27:39: " + getCheckMessage(MSG_KEY),
448 "30:39: " + getCheckMessage(MSG_KEY),
449 "33:39: " + getCheckMessage(MSG_KEY),
450 "36:22: " + getCheckMessage(MSG_KEY),
451 };
452 verifyWithInlineConfigParser(
453 getNonCompilablePath("InputAvoidEscapedUnicodeCharactersEscapedS.java"),
454 expected);
455 }
456
457 @Test
458 public void testGetAcceptableTokens() {
459 final AvoidEscapedUnicodeCharactersCheck check = new AvoidEscapedUnicodeCharactersCheck();
460 final int[] actual = check.getAcceptableTokens();
461 final int[] expected = {
462 TokenTypes.STRING_LITERAL,
463 TokenTypes.CHAR_LITERAL,
464 TokenTypes.TEXT_BLOCK_CONTENT,
465 };
466 assertWithMessage("Acceptable tokens differ from expected")
467 .that(actual)
468 .isEqualTo(expected);
469 }
470
471 @Test
472 public void testAllowEscapesForControlCharacterSetForAllCharacters() throws Exception {
473
474 final int indexOfStartLineInInputFile = 16;
475 final String message = getCheckMessage(MSG_KEY);
476 final String[] expected = IntStream.rangeClosed(0, 0xffff)
477 .parallel()
478 .filter(val -> !isControlCharacter(val))
479 .mapToObj(msg -> indexOfStartLineInInputFile + msg + ":54: " + message)
480 .toArray(String[]::new);
481 verifyWithInlineConfigParser(
482 getPath("InputAvoidEscapedUnicodeCharactersAllEscapedUnicodeCharacters.java"),
483 expected);
484 }
485
486
487
488
489
490
491
492
493
494
495
496 @Test
497 public void testCountMatches() throws Exception {
498 final AvoidEscapedUnicodeCharactersCheck check = new AvoidEscapedUnicodeCharactersCheck();
499 final int actual = TestUtil.invokeMethod(check, "countMatches",
500 Pattern.compile("\\\\u[a-fA-F\\d]{4}"), "\\u1234");
501 assertWithMessage("Unexpected matches count")
502 .that(actual)
503 .isEqualTo(1);
504 }
505
506
507
508
509
510 @Test
511 public void testNonPrintableCharsAreSorted() {
512 String expression = TestUtil.<Pattern>getInternalStaticState(
513 AvoidEscapedUnicodeCharactersCheck.class, "NON_PRINTABLE_CHARS").pattern();
514
515
516 final String[] charExpressions = {"Aa", "Bb", "Cc", "Dd", "Ee", "Ff"};
517 for (String charExpression : charExpressions) {
518 final String regex = "\\[[" + charExpression + "]{2}]";
519 expression = expression.replaceAll(regex, charExpression.substring(0, 1));
520 }
521
522
523 for (int i = 4; i > 1; i--) {
524 final String regex = "([A-F])\\{" + i + "}";
525 String replacement = "$1$1{" + (i - 1) + "}";
526 if (i == 2) {
527 replacement = "$1$1";
528 }
529 expression = expression.replaceAll(regex, replacement);
530 }
531
532
533 final String[] expressionParts = expression.split("\\|");
534 final Pattern unicodeCharPattern = Pattern.compile("^\\\\\\\\u[\\dA-F]{4}$");
535 String lastChar = null;
536 for (int i = 0; i < expressionParts.length; i++) {
537 final String currentChar = expressionParts[i];
538 final Matcher matcher = unicodeCharPattern.matcher(currentChar);
539 if (!matcher.matches()) {
540 final String message = "Character '" + currentChar + "' (at position " + i
541 + ") doesn't match the pattern";
542 assertWithMessage(message)
543 .that(matcher.matches())
544 .isTrue();
545 }
546 if (lastChar != null) {
547 final String message = "Character '" + lastChar + "' should be after '"
548 + currentChar + "', position: " + i;
549 assertWithMessage(message)
550 .that(lastChar.compareTo(currentChar) < 0)
551 .isTrue();
552 }
553 lastChar = currentChar;
554 }
555 }
556
557 private static boolean isControlCharacter(final int character) {
558 return Arrays.binarySearch(C0_CONTROL_CHARACTER_INDICES, character) >= 0
559 || Arrays.binarySearch(C1_CONTROL_CHARACTER_INDICES, character) >= 0
560 || Arrays.binarySearch(OTHER_CONTROL_CHARACTER_INDICES, character) >= 0;
561 }
562
563 }