1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package com.puppycrawl.tools.checkstyle.meta;
21
22 import java.util.ArrayDeque;
23 import java.util.Arrays;
24 import java.util.Collections;
25 import java.util.Deque;
26 import java.util.HashMap;
27 import java.util.HashSet;
28 import java.util.LinkedHashSet;
29 import java.util.Locale;
30 import java.util.Map;
31 import java.util.Optional;
32 import java.util.Set;
33 import java.util.regex.Matcher;
34 import java.util.regex.Pattern;
35 import java.util.stream.Collectors;
36
37 import javax.xml.parsers.ParserConfigurationException;
38 import javax.xml.transform.TransformerException;
39
40 import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
41 import com.puppycrawl.tools.checkstyle.api.DetailAST;
42 import com.puppycrawl.tools.checkstyle.api.DetailNode;
43 import com.puppycrawl.tools.checkstyle.api.JavadocTokenTypes;
44 import com.puppycrawl.tools.checkstyle.api.TokenTypes;
45 import com.puppycrawl.tools.checkstyle.checks.javadoc.AbstractJavadocCheck;
46 import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
47
48
49
50
51 @FileStatefulCheck
52 public class JavadocMetadataScraper extends AbstractJavadocCheck {
53
54
55
56
57
58 public static final String MSG_DESC_MISSING = "javadocmetadatascraper.description.missing";
59
60
61 private static final Map<String, ModuleDetails> MODULE_DETAILS_STORE = new HashMap<>();
62
63
64 private static final Pattern PROPERTY_TAG = Pattern.compile("\\s*Property\\s*");
65
66
67 private static final Pattern TYPE_TAG = Pattern.compile("^ Type is\\s.*");
68
69
70 private static final Pattern VALIDATION_TYPE_TAG =
71 Pattern.compile("\\s.*Validation type is\\s.*");
72
73
74 private static final Pattern DEFAULT_VALUE_TAG = Pattern.compile("^ Default value is:*.*");
75
76
77 private static final Pattern EXAMPLES_TAG =
78 Pattern.compile("\\s*To configure the (default )?check.*");
79
80
81 private static final Pattern PARENT_TAG = Pattern.compile("\\s*Parent is\\s*");
82
83
84 private static final Pattern VIOLATION_MESSAGES_TAG =
85 Pattern.compile("\\s*Violation Message Keys:\\s*");
86
87
88 private static final Pattern TOKEN_TEXT_PATTERN = Pattern.compile("([A-Z_]{2,})+");
89
90
91 private static final Pattern DESC_CLEAN = Pattern.compile("-\\s");
92
93
94 private static final Pattern FILE_SEPARATOR_PATTERN =
95 Pattern.compile(Pattern.quote(System.getProperty("file.separator")));
96
97
98 private static final Pattern QUOTE_PATTERN = Pattern.compile("\"");
99
100
101 private static final String JAVA_FILE_EXTENSION = ".java";
102
103
104
105
106
107 private static final Set<String> PROPERTIES_TO_NOT_WRITE = Set.of(
108 "null",
109 "the charset property of the parent <a href=https://checkstyle.org/"
110 + "config.html#Checker>Checker</a> module");
111
112
113
114
115 private static final String PROP_TYPE_MISSING = "Type for property '%s' is missing";
116
117
118
119
120 private static final String PROP_DEFAULT_VALUE_MISSING =
121 "Default value for property '%s' is missing";
122
123
124 private ModuleDetails moduleDetails;
125
126
127
128
129
130 private boolean scrapingViolationMessageList;
131
132
133
134
135
136
137 private boolean toScan;
138
139
140 private DetailNode rootNode;
141
142
143
144
145
146 private int propertySectionStartIdx;
147
148
149
150
151
152 private int exampleSectionStartIdx;
153
154
155
156
157
158 private int parentSectionStartIdx;
159
160
161
162
163 private boolean writeXmlOutput = true;
164
165
166
167
168
169
170 public final void setWriteXmlOutput(boolean writeXmlOutput) {
171 this.writeXmlOutput = writeXmlOutput;
172 }
173
174 @Override
175 public int[] getDefaultJavadocTokens() {
176 return new int[] {
177 JavadocTokenTypes.JAVADOC,
178 JavadocTokenTypes.PARAGRAPH,
179 JavadocTokenTypes.LI,
180 JavadocTokenTypes.SINCE_LITERAL,
181 };
182 }
183
184 @Override
185 public int[] getRequiredJavadocTokens() {
186 return getAcceptableJavadocTokens();
187 }
188
189 @Override
190 public void beginJavadocTree(DetailNode rootAst) {
191 if (isTopLevelClassJavadoc()) {
192 moduleDetails = new ModuleDetails();
193 toScan = false;
194 scrapingViolationMessageList = false;
195 propertySectionStartIdx = -1;
196 exampleSectionStartIdx = -1;
197 parentSectionStartIdx = -1;
198
199 String moduleName = getModuleSimpleName();
200 final String checkModuleExtension = "Check";
201 if (moduleName.endsWith(checkModuleExtension)) {
202 moduleName = moduleName
203 .substring(0, moduleName.length() - checkModuleExtension.length());
204 }
205 moduleDetails.setName(moduleName);
206 moduleDetails.setFullQualifiedName(getPackageName(getFilePath()));
207 moduleDetails.setModuleType(getModuleType());
208 }
209 }
210
211 @Override
212 public void visitJavadocToken(DetailNode ast) {
213 if (toScan) {
214 scrapeContent(ast);
215 }
216
217 if (ast.getType() == JavadocTokenTypes.JAVADOC) {
218 final DetailAST parent = getParent(getBlockCommentAst());
219 if (parent.getType() == TokenTypes.CLASS_DEF) {
220 rootNode = ast;
221 toScan = true;
222 }
223 }
224 else if (ast.getType() == JavadocTokenTypes.SINCE_LITERAL) {
225 toScan = false;
226 }
227 }
228
229 @Override
230 public void finishJavadocTree(DetailNode rootAst) {
231 moduleDetails.setDescription(getDescriptionText());
232 if (isTopLevelClassJavadoc()) {
233 if (moduleDetails.getDescription().isEmpty()) {
234 final String fullQualifiedName = moduleDetails.getFullQualifiedName();
235 log(rootAst.getLineNumber(), MSG_DESC_MISSING,
236 fullQualifiedName.substring(fullQualifiedName.lastIndexOf('.') + 1));
237 }
238 else if (writeXmlOutput) {
239 try {
240 XmlMetaWriter.write(moduleDetails);
241 }
242 catch (TransformerException | ParserConfigurationException exc) {
243 throw new IllegalStateException(
244 "Failed to write metadata into XML file for module: "
245 + getModuleSimpleName(), exc);
246 }
247 }
248 if (!writeXmlOutput) {
249 MODULE_DETAILS_STORE.put(moduleDetails.getFullQualifiedName(), moduleDetails);
250 }
251
252 }
253 }
254
255
256
257
258
259
260
261 private void scrapeContent(DetailNode ast) {
262 if (ast.getType() == JavadocTokenTypes.PARAGRAPH) {
263 if (isParentText(ast)) {
264 parentSectionStartIdx = getParentIndexOf(ast);
265 moduleDetails.setParent(getParentText(ast));
266 }
267 else if (isViolationMessagesText(ast)) {
268 scrapingViolationMessageList = true;
269 }
270 else if (exampleSectionStartIdx == -1
271 && isExamplesText(ast)) {
272 exampleSectionStartIdx = getParentIndexOf(ast);
273 }
274 }
275 else if (ast.getType() == JavadocTokenTypes.LI) {
276 if (isPropertyList(ast)) {
277 if (propertySectionStartIdx == -1) {
278 propertySectionStartIdx = getParentIndexOf(ast);
279 }
280 moduleDetails.addToProperties(createProperties(ast));
281 }
282 else if (scrapingViolationMessageList) {
283 moduleDetails.addToViolationMessages(getViolationMessages(ast));
284 }
285 }
286 }
287
288
289
290
291
292
293
294 private static ModulePropertyDetails createProperties(DetailNode nodeLi) {
295 final ModulePropertyDetails modulePropertyDetails = new ModulePropertyDetails();
296
297 final Optional<DetailNode> propertyNameNode = getFirstChildOfType(nodeLi,
298 JavadocTokenTypes.JAVADOC_INLINE_TAG, 0);
299 if (propertyNameNode.isPresent()) {
300 final DetailNode propertyNameTag = propertyNameNode.orElseThrow();
301 final String propertyName = getTextFromTag(propertyNameTag);
302
303 final DetailNode propertyType = getFirstChildOfMatchingText(nodeLi, TYPE_TAG)
304 .orElseThrow(() -> {
305 return new MetadataGenerationException(String.format(
306 Locale.ROOT, PROP_TYPE_MISSING, propertyName)
307 );
308 });
309 final String propertyDesc = DESC_CLEAN.matcher(
310 constructSubTreeText(nodeLi, propertyNameTag.getIndex() + 1,
311 propertyType.getIndex() - 1))
312 .replaceAll(Matcher.quoteReplacement(""));
313
314 modulePropertyDetails.setDescription(propertyDesc.trim());
315 modulePropertyDetails.setName(propertyName);
316 modulePropertyDetails.setType(getTagTextFromProperty(nodeLi, propertyType));
317
318 final Optional<DetailNode> validationTypeNodeOpt = getFirstChildOfMatchingText(nodeLi,
319 VALIDATION_TYPE_TAG);
320 if (validationTypeNodeOpt.isPresent()) {
321 final DetailNode validationTypeNode = validationTypeNodeOpt.orElseThrow();
322 modulePropertyDetails.setValidationType(getTagTextFromProperty(nodeLi,
323 validationTypeNode));
324 }
325
326 final String defaultValue = getFirstChildOfMatchingText(nodeLi, DEFAULT_VALUE_TAG)
327 .map(defaultValueNode -> getPropertyDefaultText(nodeLi, defaultValueNode))
328 .orElseThrow(() -> {
329 return new MetadataGenerationException(String.format(
330 Locale.ROOT, PROP_DEFAULT_VALUE_MISSING, propertyName)
331 );
332 });
333 if (!PROPERTIES_TO_NOT_WRITE.contains(defaultValue)) {
334 modulePropertyDetails.setDefaultValue(defaultValue);
335 }
336 }
337 return modulePropertyDetails;
338 }
339
340
341
342
343
344
345
346
347 private static String getTagTextFromProperty(DetailNode nodeLi, DetailNode propertyMeta) {
348 final Optional<DetailNode> tagNodeOpt = getFirstChildOfType(nodeLi,
349 JavadocTokenTypes.JAVADOC_INLINE_TAG, propertyMeta.getIndex() + 1);
350 DetailNode tagNode = null;
351 if (tagNodeOpt.isPresent()) {
352 tagNode = tagNodeOpt.orElseThrow();
353 }
354 return getTextFromTag(tagNode);
355 }
356
357
358
359
360
361
362
363 private static String cleanDefaultTokensText(String initialText) {
364 final Set<String> tokens = new LinkedHashSet<>();
365 final Matcher matcher = TOKEN_TEXT_PATTERN.matcher(initialText);
366 while (matcher.find()) {
367 tokens.add(matcher.group(0));
368 }
369 return String.join(",", tokens);
370 }
371
372
373
374
375
376
377
378
379
380
381 public static String constructSubTreeText(DetailNode node, int childLeftLimit,
382 int childRightLimit) {
383 DetailNode detailNode = node;
384
385 final Deque<DetailNode> stack = new ArrayDeque<>();
386 stack.addFirst(detailNode);
387 final Set<DetailNode> visited = new HashSet<>();
388 final StringBuilder result = new StringBuilder(1024);
389 while (!stack.isEmpty()) {
390 detailNode = stack.removeFirst();
391
392 if (visited.add(detailNode) && isContentToWrite(detailNode)) {
393 String childText = detailNode.getText();
394
395 if (detailNode.getParent().getType() == JavadocTokenTypes.JAVADOC_INLINE_TAG) {
396 childText = adjustCodeInlineTagChildToHtml(detailNode);
397 }
398
399 result.insert(0, childText);
400 }
401
402 for (DetailNode child : detailNode.getChildren()) {
403 if (child.getParent().equals(node)
404 && (child.getIndex() < childLeftLimit
405 || child.getIndex() > childRightLimit)) {
406 continue;
407 }
408 if (!visited.contains(child)) {
409 stack.addFirst(child);
410 }
411 }
412 }
413 return result.toString().trim();
414 }
415
416
417
418
419
420
421
422 private static boolean isContentToWrite(DetailNode detailNode) {
423
424 return detailNode.getType() != JavadocTokenTypes.LEADING_ASTERISK
425 && (detailNode.getType() == JavadocTokenTypes.TEXT
426 || !TOKEN_TEXT_PATTERN.matcher(detailNode.getText()).matches());
427 }
428
429
430
431
432
433
434
435 private static String adjustCodeInlineTagChildToHtml(DetailNode codeChild) {
436
437 return switch (codeChild.getType()) {
438 case JavadocTokenTypes.JAVADOC_INLINE_TAG_END -> "</code>";
439 case JavadocTokenTypes.WS -> "";
440 case JavadocTokenTypes.CODE_LITERAL -> codeChild.getText().replace("@", "") + ">";
441 case JavadocTokenTypes.JAVADOC_INLINE_TAG_START -> "<";
442 default -> codeChild.getText();
443 };
444 }
445
446
447
448
449
450
451
452
453 private String getDescriptionText() {
454 final int descriptionEndIdx;
455 if (propertySectionStartIdx > -1) {
456 descriptionEndIdx = propertySectionStartIdx;
457 }
458 else if (exampleSectionStartIdx > -1) {
459 descriptionEndIdx = exampleSectionStartIdx;
460 }
461 else {
462 descriptionEndIdx = parentSectionStartIdx;
463 }
464 return constructSubTreeText(rootNode, 0, descriptionEndIdx - 1);
465 }
466
467
468
469
470
471
472
473
474 private static String getPropertyDefaultText(DetailNode nodeLi, DetailNode defaultValueNode) {
475 final Optional<DetailNode> propertyDefaultValueTag = getFirstChildOfType(nodeLi,
476 JavadocTokenTypes.JAVADOC_INLINE_TAG, defaultValueNode.getIndex() + 1);
477 final String result;
478 if (propertyDefaultValueTag.isPresent()) {
479 result = getTextFromTag(propertyDefaultValueTag.orElseThrow());
480 }
481 else {
482 final String tokenText = constructSubTreeText(nodeLi,
483 defaultValueNode.getIndex(), nodeLi.getChildren().length);
484 result = cleanDefaultTokensText(tokenText);
485 }
486 return result;
487 }
488
489
490
491
492
493
494
495 private static String getViolationMessages(DetailNode nodeLi) {
496 final Optional<DetailNode> resultNode = getFirstChildOfType(nodeLi,
497 JavadocTokenTypes.JAVADOC_INLINE_TAG, 0);
498 return resultNode.map(JavadocMetadataScraper::getTextFromTag).orElse("");
499 }
500
501
502
503
504
505
506
507 private static String getTextFromTag(DetailNode nodeTag) {
508 return Optional.ofNullable(nodeTag).map(JavadocMetadataScraper::getText).orElse("");
509 }
510
511
512
513
514
515
516
517
518
519
520 private static Optional<DetailNode> getFirstChildOfType(DetailNode node, int tokenType,
521 int offset) {
522 return Arrays.stream(node.getChildren())
523 .filter(child -> child.getIndex() >= offset && child.getType() == tokenType)
524 .findFirst();
525 }
526
527
528
529
530
531
532
533 private static String getText(DetailNode parentNode) {
534 return Arrays.stream(parentNode.getChildren())
535 .filter(child -> child.getType() == JavadocTokenTypes.TEXT)
536 .map(node -> QUOTE_PATTERN.matcher(node.getText().trim()).replaceAll(""))
537 .collect(Collectors.joining(" "));
538 }
539
540
541
542
543
544
545
546
547 private static Optional<DetailNode> getFirstChildOfMatchingText(DetailNode node,
548 Pattern pattern) {
549 return Arrays.stream(node.getChildren())
550 .filter(child -> pattern.matcher(child.getText()).matches())
551 .findFirst();
552 }
553
554
555
556
557
558
559
560 private static DetailAST getParent(DetailAST commentBlock) {
561 final DetailAST parentNode = commentBlock.getParent();
562 DetailAST result = parentNode;
563 if (result.getType() == TokenTypes.ANNOTATION) {
564 result = parentNode.getParent().getParent();
565 }
566 else if (result.getType() == TokenTypes.MODIFIERS) {
567 result = parentNode.getParent();
568 }
569 return result;
570 }
571
572
573
574
575
576
577
578
579 public static int getParentIndexOf(DetailNode node) {
580 DetailNode currNode = node;
581 while (currNode.getParent().getIndex() != -1) {
582 currNode = currNode.getParent();
583 }
584 return currNode.getIndex();
585 }
586
587
588
589
590
591
592
593 private static String getParentText(DetailNode nodeParagraph) {
594 return getFirstChildOfType(nodeParagraph, JavadocTokenTypes.JAVADOC_INLINE_TAG, 0)
595 .map(JavadocMetadataScraper::getTextFromTag)
596 .orElse(null);
597 }
598
599
600
601
602
603
604 private ModuleType getModuleType() {
605 final String simpleModuleName = getModuleSimpleName();
606 final ModuleType result;
607 if (simpleModuleName.endsWith("FileFilter")) {
608 result = ModuleType.FILEFILTER;
609 }
610 else if (simpleModuleName.endsWith("Filter")) {
611 result = ModuleType.FILTER;
612 }
613 else {
614 result = ModuleType.CHECK;
615 }
616 return result;
617 }
618
619
620
621
622
623
624 private String getModuleSimpleName() {
625 final String fullFileName = getFilePath();
626 final String[] pathTokens = FILE_SEPARATOR_PATTERN.split(fullFileName);
627 final String fileName = pathTokens[pathTokens.length - 1];
628 return fileName.substring(0, fileName.length() - JAVA_FILE_EXTENSION.length());
629 }
630
631
632
633
634
635
636
637 private static String getPackageName(String filePath) {
638 final Deque<String> result = new ArrayDeque<>();
639 final String[] filePathTokens = FILE_SEPARATOR_PATTERN.split(filePath);
640 for (int i = filePathTokens.length - 1; i >= 0; i--) {
641 if ("java".equals(filePathTokens[i]) || "resources".equals(filePathTokens[i])) {
642 break;
643 }
644 result.addFirst(filePathTokens[i]);
645 }
646 final String fileName = result.removeLast();
647 result.addLast(fileName.substring(0, fileName.length() - JAVA_FILE_EXTENSION.length()));
648 return String.join(".", result);
649 }
650
651
652
653
654
655
656 public static Map<String, ModuleDetails> getModuleDetailsStore() {
657 return Collections.unmodifiableMap(MODULE_DETAILS_STORE);
658 }
659
660
661 public static void resetModuleDetailsStore() {
662 MODULE_DETAILS_STORE.clear();
663 }
664
665
666
667
668
669
670
671 private boolean isTopLevelClassJavadoc() {
672 final DetailAST parent = getParent(getBlockCommentAst());
673 final Optional<DetailAST> className = TokenUtil
674 .findFirstTokenByPredicate(parent, child -> {
675 return parent.getType() == TokenTypes.CLASS_DEF
676 && child.getType() == TokenTypes.IDENT;
677 });
678 return className.isPresent()
679 && getModuleSimpleName().equals(className.orElseThrow().getText());
680 }
681
682
683
684
685
686
687
688 private static boolean isExamplesText(DetailNode ast) {
689 return isChildNodeTextMatches(ast, EXAMPLES_TAG);
690 }
691
692
693
694
695
696
697
698 private static boolean isPropertyList(DetailNode nodeLi) {
699 return isChildNodeTextMatches(nodeLi, PROPERTY_TAG);
700 }
701
702
703
704
705
706
707
708
709 private static boolean isViolationMessagesText(DetailNode nodeParagraph) {
710 return isChildNodeTextMatches(nodeParagraph, VIOLATION_MESSAGES_TAG);
711 }
712
713
714
715
716
717
718
719
720 public static boolean isParentText(DetailNode nodeParagraph) {
721 return isChildNodeTextMatches(nodeParagraph, PARENT_TAG);
722 }
723
724
725
726
727
728
729
730
731 public static boolean isChildNodeTextMatches(DetailNode ast, Pattern pattern) {
732 return getFirstChildOfType(ast, JavadocTokenTypes.TEXT, 0)
733 .map(DetailNode::getText)
734 .map(pattern::matcher)
735 .map(Matcher::matches)
736 .orElse(Boolean.FALSE);
737 }
738 }