View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2026 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.checks;
21  
22  import static com.google.common.truth.Truth.assertWithMessage;
23  import static com.puppycrawl.tools.checkstyle.checks.AvoidEscapedUnicodeCharactersCheck.MSG_KEY;
24  
25  import java.util.Arrays;
26  import java.util.List;
27  import java.util.regex.Matcher;
28  import java.util.regex.Pattern;
29  import java.util.stream.IntStream;
30  
31  import org.junit.jupiter.api.Test;
32  
33  import com.google.common.base.Splitter;
34  import com.puppycrawl.tools.checkstyle.AbstractModuleTestSupport;
35  import com.puppycrawl.tools.checkstyle.api.TokenTypes;
36  import com.puppycrawl.tools.checkstyle.internal.utils.TestUtil;
37  import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
38  
39  public class AvoidEscapedUnicodeCharactersCheckTest extends AbstractModuleTestSupport {
40  
41      // C0 (ASCII and derivatives)
42      // https://en.wiktionary.org/wiki/Appendix:Control_characters#C0_.28ASCII_and_derivatives.29
43      private static final int[] C0_CONTROL_CHARACTER_INDICES = {
44          0x0000,
45          0x0001,
46          0x0002,
47          0x0003,
48          0x0004,
49          0x0005,
50          0x0006,
51          0x0007,
52          0x0008,
53          0x0009,
54          0x000A,
55          0x000B,
56          0x000C,
57          0x000D,
58          0x000E,
59          0x000F,
60          0x0010,
61          0x0011,
62          0x0012,
63          0x0013,
64          0x0014,
65          0x0015,
66          0x0016,
67          0x0017,
68          0x0018,
69          0x0019,
70          0x001A,
71          0x001B,
72          0x001C,
73          0x001D,
74          0x001E,
75          0x001F,
76      };
77  
78      // C1 set
79      // https://en.wiktionary.org/wiki/Appendix:Control_characters#C1_set
80      private static final int[] C1_CONTROL_CHARACTER_INDICES = {
81          0x0080,
82          0x0081,
83          0x0082,
84          0x0083,
85          0x0084,
86          0x0085,
87          0x0086,
88          0x0087,
89          0x0088,
90          0x0089,
91          0x008A,
92          0x008B,
93          0x008C,
94          0x008D,
95          0x008E,
96          0x008F,
97          0x0090,
98          0x0091,
99          0x0092,
100         0x0093,
101         0x0094,
102         0x0095,
103         0x0096,
104         0x0097,
105         0x0098,
106         0x0099,
107         0x009A,
108         0x009B,
109         0x009C,
110         0x009D,
111         0x009E,
112         0x009F,
113     };
114 
115     // Other control characters which do not occur in the C0 or C1 sets
116     // https://en.wiktionary.org/wiki/Appendix:Control_characters#Unicode_control_characters
117     private static final int[] OTHER_CONTROL_CHARACTER_INDICES = {
118         0x00AD,
119         0x034F,
120         0x070F,
121         0x180E,
122         0x200B,
123         0x200C,
124         0x200D,
125         0x200E,
126         0x200F,
127         0x202A,
128         0x202B,
129         0x202C,
130         0x202D,
131         0x202E,
132         0x2060,
133         0x2061,
134         0x2062,
135         0x2063,
136         0x2064,
137         0x206A,
138         0x206B,
139         0x206C,
140         0x206D,
141         0x206E,
142         0x206F,
143         0xFEFF,
144         0xFFF9,
145         0xFFFA,
146         0xFFFB,
147     };
148 
149     @Override
150     public String getPackageLocation() {
151         return "com/puppycrawl/tools/checkstyle/checks/avoidescapedunicodecharacters";
152     }
153 
154     @Test
155     public void testGetRequiredTokens() {
156         final AvoidEscapedUnicodeCharactersCheck checkObj =
157             new AvoidEscapedUnicodeCharactersCheck();
158         final int[] expected = {
159             TokenTypes.STRING_LITERAL,
160             TokenTypes.CHAR_LITERAL,
161             TokenTypes.TEXT_BLOCK_CONTENT,
162             TokenTypes.SINGLE_LINE_COMMENT,
163             TokenTypes.BLOCK_COMMENT_BEGIN,
164         };
165         assertWithMessage("Required tokens differ from expected")
166             .that(checkObj.getRequiredTokens())
167             .isEqualTo(expected);
168     }
169 
170     @Test
171     public void testDefault() throws Exception {
172         final String[] expected = {
173             "17:38: " + getCheckMessage(MSG_KEY),
174             "19:38: " + getCheckMessage(MSG_KEY),
175             "21:38: " + getCheckMessage(MSG_KEY),
176             "25:38: " + getCheckMessage(MSG_KEY),
177             "27:38: " + getCheckMessage(MSG_KEY),
178             "32:24: " + getCheckMessage(MSG_KEY),
179             "37:36: " + getCheckMessage(MSG_KEY),
180             "39:36: " + getCheckMessage(MSG_KEY),
181             "42:24: " + getCheckMessage(MSG_KEY),
182             "47:38: " + getCheckMessage(MSG_KEY),
183             "49:38: " + getCheckMessage(MSG_KEY),
184             "51:38: " + getCheckMessage(MSG_KEY),
185             "53:47: " + getCheckMessage(MSG_KEY),
186             "62:32: " + getCheckMessage(MSG_KEY),
187             "80:35: " + getCheckMessage(MSG_KEY),
188             "82:35: " + getCheckMessage(MSG_KEY),
189             "84:35: " + getCheckMessage(MSG_KEY),
190             "86:35: " + getCheckMessage(MSG_KEY),
191             "97:24: " + getCheckMessage(MSG_KEY),
192             "98:24: " + getCheckMessage(MSG_KEY),
193             "99:24: " + getCheckMessage(MSG_KEY),
194             "100:24: " + getCheckMessage(MSG_KEY),
195             "101:24: " + getCheckMessage(MSG_KEY),
196             "102:24: " + getCheckMessage(MSG_KEY),
197             "104:24: " + getCheckMessage(MSG_KEY),
198             "107:31: " + getCheckMessage(MSG_KEY),
199             "107:48: " + getCheckMessage(MSG_KEY),
200             "114:38: " + getCheckMessage(MSG_KEY),
201             "116:38: " + getCheckMessage(MSG_KEY),
202             "118:38: " + getCheckMessage(MSG_KEY),
203             "120:38: " + getCheckMessage(MSG_KEY),
204             "124:31: " + getCheckMessage(MSG_KEY),
205             "124:45: " + getCheckMessage(MSG_KEY),
206             "128:34: " + getCheckMessage(MSG_KEY),
207             "130:46: " + getCheckMessage(MSG_KEY),
208             "135:38: " + getCheckMessage(MSG_KEY),
209             "142:38: " + getCheckMessage(MSG_KEY),
210             "145:46: " + getCheckMessage(MSG_KEY),
211             "147:55: " + getCheckMessage(MSG_KEY),
212             "149:46: " + getCheckMessage(MSG_KEY),
213             "151:55: " + getCheckMessage(MSG_KEY),
214             "153:46: " + getCheckMessage(MSG_KEY),
215             "155:55: " + getCheckMessage(MSG_KEY),
216             "157:46: " + getCheckMessage(MSG_KEY),
217             "159:55: " + getCheckMessage(MSG_KEY),
218             "161:46: " + getCheckMessage(MSG_KEY),
219             "163:55: " + getCheckMessage(MSG_KEY),
220             "165:48: " + getCheckMessage(MSG_KEY),
221             "167:57: " + getCheckMessage(MSG_KEY),
222         };
223         verifyWithInlineConfigParser(
224                 getPath("InputAvoidEscapedUnicodeCharacters.java"), expected);
225     }
226 
227     @Test
228     public void testAllowEscapesForControlCharacterSet() throws Exception {
229         final String[] expected = {
230             "17:38: " + getCheckMessage(MSG_KEY),
231             "19:38: " + getCheckMessage(MSG_KEY),
232             "21:38: " + getCheckMessage(MSG_KEY),
233             "25:38: " + getCheckMessage(MSG_KEY),
234             "27:38: " + getCheckMessage(MSG_KEY),
235             "36:36: " + getCheckMessage(MSG_KEY),
236             "38:36: " + getCheckMessage(MSG_KEY),
237             "45:38: " + getCheckMessage(MSG_KEY),
238             "47:38: " + getCheckMessage(MSG_KEY),
239             "49:38: " + getCheckMessage(MSG_KEY),
240             "51:47: " + getCheckMessage(MSG_KEY),
241             "60:32: " + getCheckMessage(MSG_KEY),
242             "78:35: " + getCheckMessage(MSG_KEY),
243             "80:35: " + getCheckMessage(MSG_KEY),
244             "82:35: " + getCheckMessage(MSG_KEY),
245             "84:35: " + getCheckMessage(MSG_KEY),
246             "96:24: " + getCheckMessage(MSG_KEY),
247             "97:24: " + getCheckMessage(MSG_KEY),
248             "98:24: " + getCheckMessage(MSG_KEY),
249             "99:24: " + getCheckMessage(MSG_KEY),
250             "100:24: " + getCheckMessage(MSG_KEY),
251             "102:24: " + getCheckMessage(MSG_KEY),
252             "105:31: " + getCheckMessage(MSG_KEY),
253             "105:48: " + getCheckMessage(MSG_KEY),
254             "112:38: " + getCheckMessage(MSG_KEY),
255             "113:38: " + getCheckMessage(MSG_KEY),
256             "115:38: " + getCheckMessage(MSG_KEY),
257             "117:38: " + getCheckMessage(MSG_KEY),
258             "121:45: " + getCheckMessage(MSG_KEY),
259             "124:46: " + getCheckMessage(MSG_KEY),
260             "129:38: " + getCheckMessage(MSG_KEY),
261             "136:38: " + getCheckMessage(MSG_KEY),
262             "138:46: " + getCheckMessage(MSG_KEY),
263             "140:55: " + getCheckMessage(MSG_KEY),
264             "142:46: " + getCheckMessage(MSG_KEY),
265             "144:55: " + getCheckMessage(MSG_KEY),
266             "146:46: " + getCheckMessage(MSG_KEY),
267             "148:55: " + getCheckMessage(MSG_KEY),
268             "150:46: " + getCheckMessage(MSG_KEY),
269             "152:55: " + getCheckMessage(MSG_KEY),
270             "154:46: " + getCheckMessage(MSG_KEY),
271             "156:55: " + getCheckMessage(MSG_KEY),
272             "158:48: " + getCheckMessage(MSG_KEY),
273             "160:57: " + getCheckMessage(MSG_KEY),
274         };
275         verifyWithInlineConfigParser(
276                 getPath("InputAvoidEscapedUnicodeCharacters1.java"), expected);
277     }
278 
279     @Test
280     public void testAllowByTailComment() throws Exception {
281         final String[] expected = {
282             "17:38: " + getCheckMessage(MSG_KEY),
283             "26:38: " + getCheckMessage(MSG_KEY),
284             "36:36: " + getCheckMessage(MSG_KEY),
285             "44:38: " + getCheckMessage(MSG_KEY),
286             "47:38: " + getCheckMessage(MSG_KEY),
287             "49:47: " + getCheckMessage(MSG_KEY),
288             "75:35: " + getCheckMessage(MSG_KEY),
289             "77:35: " + getCheckMessage(MSG_KEY),
290             "79:35: " + getCheckMessage(MSG_KEY),
291             "81:35: " + getCheckMessage(MSG_KEY),
292             "93:24: " + getCheckMessage(MSG_KEY),
293             "95:24: " + getCheckMessage(MSG_KEY),
294             "97:24: " + getCheckMessage(MSG_KEY),
295             "99:24: " + getCheckMessage(MSG_KEY),
296             "101:24: " + getCheckMessage(MSG_KEY),
297             "104:24: " + getCheckMessage(MSG_KEY),
298             "108:31: " + getCheckMessage(MSG_KEY),
299             "108:48: " + getCheckMessage(MSG_KEY),
300             "121:31: " + getCheckMessage(MSG_KEY),
301             "121:45: " + getCheckMessage(MSG_KEY),
302             "130:38: " + getCheckMessage(MSG_KEY),
303             "136:38: " + getCheckMessage(MSG_KEY),
304             "138:46: " + getCheckMessage(MSG_KEY),
305             "142:46: " + getCheckMessage(MSG_KEY),
306             "145:46: " + getCheckMessage(MSG_KEY),
307             "148:46: " + getCheckMessage(MSG_KEY),
308             "151:46: " + getCheckMessage(MSG_KEY),
309             "154:48: " + getCheckMessage(MSG_KEY),
310         };
311         verifyWithInlineConfigParser(
312                 getPath("InputAvoidEscapedUnicodeCharacters2.java"), expected);
313     }
314 
315     @Test
316     public void testAllowAllCharactersEscaped() throws Exception {
317         final String[] expected = {
318             "17:38: " + getCheckMessage(MSG_KEY),
319             "19:38: " + getCheckMessage(MSG_KEY),
320             "21:38: " + getCheckMessage(MSG_KEY),
321             "26:38: " + getCheckMessage(MSG_KEY),
322             "28:38: " + getCheckMessage(MSG_KEY),
323             "44:38: " + getCheckMessage(MSG_KEY),
324             "46:38: " + getCheckMessage(MSG_KEY),
325             "48:38: " + getCheckMessage(MSG_KEY),
326             "58:32: " + getCheckMessage(MSG_KEY),
327             "103:38: " + getCheckMessage(MSG_KEY),
328             "105:38: " + getCheckMessage(MSG_KEY),
329             "107:38: " + getCheckMessage(MSG_KEY),
330             "109:38: " + getCheckMessage(MSG_KEY),
331             "119:38: " + getCheckMessage(MSG_KEY),
332             "127:46: " + getCheckMessage(MSG_KEY),
333             "129:55: " + getCheckMessage(MSG_KEY),
334             "131:46: " + getCheckMessage(MSG_KEY),
335             "133:55: " + getCheckMessage(MSG_KEY),
336             "135:46: " + getCheckMessage(MSG_KEY),
337             "137:55: " + getCheckMessage(MSG_KEY),
338             "139:46: " + getCheckMessage(MSG_KEY),
339             "141:55: " + getCheckMessage(MSG_KEY),
340             "143:46: " + getCheckMessage(MSG_KEY),
341             "145:55: " + getCheckMessage(MSG_KEY),
342             "147:48: " + getCheckMessage(MSG_KEY),
343             "149:57: " + getCheckMessage(MSG_KEY),
344         };
345         verifyWithInlineConfigParser(
346                 getPath("InputAvoidEscapedUnicodeCharacters3.java"), expected);
347     }
348 
349     @Test
350     public void allowNonPrintableEscapesOne() throws Exception {
351         final String[] expected = {
352             "17:38: " + getCheckMessage(MSG_KEY),
353             "19:38: " + getCheckMessage(MSG_KEY),
354             "21:38: " + getCheckMessage(MSG_KEY),
355             "26:38: " + getCheckMessage(MSG_KEY),
356             "28:38: " + getCheckMessage(MSG_KEY),
357             "37:36: " + getCheckMessage(MSG_KEY),
358             "39:36: " + getCheckMessage(MSG_KEY),
359             "46:38: " + getCheckMessage(MSG_KEY),
360             "48:38: " + getCheckMessage(MSG_KEY),
361             "50:38: " + getCheckMessage(MSG_KEY),
362             "52:47: " + getCheckMessage(MSG_KEY),
363             "61:32: " + getCheckMessage(MSG_KEY),
364         };
365         verifyWithInlineConfigParser(
366                 getPath("InputAvoidEscapedUnicodeCharacters4One.java"), expected);
367     }
368 
369     @Test
370     public void allowNonPrintableEscapesTwo() throws Exception {
371         final String[] expected = {
372             "17:38: " + getCheckMessage(MSG_KEY),
373             "19:38: " + getCheckMessage(MSG_KEY),
374             "21:38: " + getCheckMessage(MSG_KEY),
375             "23:38: " + getCheckMessage(MSG_KEY),
376             "28:34: " + getCheckMessage(MSG_KEY),
377             "30:46: " + getCheckMessage(MSG_KEY),
378             "35:38: " + getCheckMessage(MSG_KEY),
379             "42:38: " + getCheckMessage(MSG_KEY),
380             "45:46: " + getCheckMessage(MSG_KEY),
381             "47:55: " + getCheckMessage(MSG_KEY),
382             "49:46: " + getCheckMessage(MSG_KEY),
383             "51:55: " + getCheckMessage(MSG_KEY),
384             "53:46: " + getCheckMessage(MSG_KEY),
385             "55:55: " + getCheckMessage(MSG_KEY),
386             "57:46: " + getCheckMessage(MSG_KEY),
387             "59:55: " + getCheckMessage(MSG_KEY),
388             "61:46: " + getCheckMessage(MSG_KEY),
389             "63:55: " + getCheckMessage(MSG_KEY),
390             "65:48: " + getCheckMessage(MSG_KEY),
391             "67:57: " + getCheckMessage(MSG_KEY),
392         };
393         verifyWithInlineConfigParser(
394                 getPath("InputAvoidEscapedUnicodeCharacters4Two.java"), expected);
395     }
396 
397     @Test
398     public void testAllowByTailCommentWithEmoji() throws Exception {
399         final String[] expected = {
400             "15:24: " + getCheckMessage(MSG_KEY),
401             "18:24: " + getCheckMessage(MSG_KEY),
402             "23:30: " + getCheckMessage(MSG_KEY),
403             "33:18: " + getCheckMessage(MSG_KEY),
404             "36:18: " + getCheckMessage(MSG_KEY),
405             "38:18: " + getCheckMessage(MSG_KEY),
406             "41:18: " + getCheckMessage(MSG_KEY),
407         };
408         verifyWithInlineConfigParser(
409                 getPath("InputAvoidEscapedUnicodeCharacters5.java"), expected);
410     }
411 
412     @Test
413     public void testAvoidEscapedUnicodeCharactersTextBlocksAllowByComment() throws Exception {
414         final String[] expected = {
415             "18:30: " + getCheckMessage(MSG_KEY),
416             "20:30: " + getCheckMessage(MSG_KEY),
417             "22:30: " + getCheckMessage(MSG_KEY),
418             "25:39: " + getCheckMessage(MSG_KEY),
419             "30:33: " + getCheckMessage(MSG_KEY),
420             "33:33: " + getCheckMessage(MSG_KEY),
421             "36:33: " + getCheckMessage(MSG_KEY),
422             "41:42: " + getCheckMessage(MSG_KEY),
423         };
424         verifyWithInlineConfigParser(
425                 getPath(
426                 "InputAvoidEscapedUnicodeCharactersTextBlocksAllowByComment.java"),
427             expected);
428     }
429 
430     @Test
431     public void testAvoidEscapedUnicodeCharactersTextBlocks() throws Exception {
432         final String[] expected = {
433             "17:30: " + getCheckMessage(MSG_KEY),
434             "19:30: " + getCheckMessage(MSG_KEY),
435             "21:30: " + getCheckMessage(MSG_KEY),
436             "23:39: " + getCheckMessage(MSG_KEY),
437             "29:33: " + getCheckMessage(MSG_KEY),
438             "32:33: " + getCheckMessage(MSG_KEY),
439             "35:33: " + getCheckMessage(MSG_KEY),
440             "38:42: " + getCheckMessage(MSG_KEY),
441         };
442         verifyWithInlineConfigParser(
443                 getPath("InputAvoidEscapedUnicodeCharactersTextBlocks.java"),
444             expected);
445     }
446 
447     @Test
448     public void testAvoidEscapedUnicodeCharactersEscapedS() throws Exception {
449         final String[] expected = {
450             "17:21: " + getCheckMessage(MSG_KEY),
451             "19:22: " + getCheckMessage(MSG_KEY),
452             "31:39: " + getCheckMessage(MSG_KEY),
453             "35:39: " + getCheckMessage(MSG_KEY),
454             "39:39: " + getCheckMessage(MSG_KEY),
455             "43:22: " + getCheckMessage(MSG_KEY),
456         };
457         verifyWithInlineConfigParser(
458                 getPath("InputAvoidEscapedUnicodeCharactersEscapedS.java"),
459                 expected);
460     }
461 
462     @Test
463     public void testBlockCommentAtAbsoluteEndOfFile() throws Exception {
464         final String[] expected = CommonUtil.EMPTY_STRING_ARRAY;
465         verifyWithInlineConfigParser(
466             getPath("InputAvoidEscapedUnicodeCharactersCommentEnd.java"),
467             expected);
468     }
469 
470     @Test
471     public void testBeginTreeClearsPendingViolations() throws Exception {
472         final String file1 = getPath("InputAvoidEscapedUnicodeCharactersBeginTree1.java");
473         final String file2 = getPath("InputAvoidEscapedUnicodeCharactersBeginTree2.java");
474 
475         final List<String> expected1 = List.of(
476                 "15:17: " + getCheckMessage(MSG_KEY));
477         final List<String> expected2 = Arrays.asList(CommonUtil.EMPTY_STRING_ARRAY);
478 
479         verifyWithInlineConfigParser(file1, file2, expected1, expected2);
480     }
481 
482     @Test
483     public void testGetAcceptableTokens() {
484         final AvoidEscapedUnicodeCharactersCheck check = new AvoidEscapedUnicodeCharactersCheck();
485         final int[] actual = check.getAcceptableTokens();
486         final int[] expected = {
487             TokenTypes.STRING_LITERAL,
488             TokenTypes.CHAR_LITERAL,
489             TokenTypes.TEXT_BLOCK_CONTENT,
490             TokenTypes.SINGLE_LINE_COMMENT,
491             TokenTypes.BLOCK_COMMENT_BEGIN,
492         };
493         assertWithMessage("Acceptable tokens differ from expected")
494             .that(actual)
495             .isEqualTo(expected);
496     }
497 
498     @Test
499     public void testAllowEscapesForControlCharacterSetForAllCharacters() throws Exception {
500 
501         final int indexOfStartLineInInputFile = 16;
502         final String message = getCheckMessage(MSG_KEY);
503         final String[] expected = IntStream.rangeClosed(0, 0xFFFF)
504                 .parallel()
505                 .filter(val -> !isControlCharacter(val))
506                 .mapToObj(msg -> indexOfStartLineInInputFile + msg + ":54: " + message)
507                 .toArray(String[]::new);
508         verifyWithInlineConfigParser(
509                 getPath("InputAvoidEscapedUnicodeCharactersAllEscapedUnicodeCharacters.java"),
510                 expected);
511     }
512 
513     /**
514      * Method countMatches is used only inside isOnlyUnicodeValidChars method, and when
515      * pitest mutates 316:13 countMatches++ to countMatches-- it makes no difference for
516      * isOnlyUnicodeValidChars method as it applies countMatches to both cases in comparison.
517      * It is possible to kill mutation in countMatches method by changing code in
518      * isOnlyUnicodeValidChars, but it creates new uncoverable mutations and makes code harder
519      * to understand.
520      *
521      * @throws Exception when code tested throws some exception
522      */
523     @Test
524     public void testCountMatches() throws Exception {
525         final AvoidEscapedUnicodeCharactersCheck check = new AvoidEscapedUnicodeCharactersCheck();
526         final int actual = TestUtil.invokeMethod(check, "countMatches", Integer.class,
527                 Pattern.compile("\\\\u[a-fA-F\\d]{4}"), "\\u1234");
528         assertWithMessage("Unexpected matches count")
529             .that(actual)
530             .isEqualTo(1);
531     }
532 
533     /**
534      * Testing, that all elements in the constant NON_PRINTABLE_CHARS are sorted.
535      * This is very convenient for the sake of maintainability.
536      */
537     @Test
538     public void testNonPrintableCharsAreSorted() {
539         String expression = TestUtil.getInternalStaticState(
540                 AvoidEscapedUnicodeCharactersCheck.class,
541                 "NON_PRINTABLE_CHARS", Pattern.class).pattern();
542 
543         // Replacing expressions like "\\u000[bB]" with "\\u000B"
544         final String[] charExpressions = {"Aa", "Bb", "Cc", "Dd", "Ee", "Ff"};
545         for (String charExpression : charExpressions) {
546             final String regex = "\\[[" + charExpression + "]{2}]";
547             expression = expression.replaceAll(regex, charExpression.substring(0, 1));
548         }
549 
550         // Replacing duplications like "\\uF{3}9" with "\\uFFF9"
551         for (int i = 4; i > 1; i--) {
552             final String regex = "([A-F])\\{" + i + "}";
553             String replacement = "$1$1{" + (i - 1) + "}";
554             if (i == 2) {
555                 replacement = "$1$1";
556             }
557             expression = expression.replaceAll(regex, replacement);
558         }
559 
560         // Verifying character order
561         final List<String> expressionParts = Splitter.on("|").splitToList(expression);
562         final Pattern unicodeCharPattern = Pattern.compile("^\\\\\\\\u[\\dA-F]{4}$");
563         String lastChar = null;
564         for (int i = 0; i < expressionParts.size(); i++) {
565             final String currentChar = expressionParts.get(i);
566             final Matcher matcher = unicodeCharPattern.matcher(currentChar);
567             if (!matcher.matches()) {
568                 final String message = "Character '" + currentChar + "' (at position " + i
569                         + ") doesn't match the pattern";
570                 assertWithMessage(message)
571                         .that(matcher.matches())
572                         .isTrue();
573             }
574             if (lastChar != null) {
575                 final String message = "Character '" + lastChar + "' should be after '"
576                         + currentChar + "', position: " + i;
577                 assertWithMessage(message)
578                         .that(lastChar.compareTo(currentChar) < 0)
579                         .isTrue();
580             }
581             lastChar = currentChar;
582         }
583     }
584 
585     private static boolean isControlCharacter(final int character) {
586         return Arrays.binarySearch(C0_CONTROL_CHARACTER_INDICES, character) >= 0
587                 || Arrays.binarySearch(C1_CONTROL_CHARACTER_INDICES, character) >= 0
588                 || Arrays.binarySearch(OTHER_CONTROL_CHARACTER_INDICES, character) >= 0;
589     }
590 
591 }