View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2024 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.checks.javadoc;
21  
22  import java.util.LinkedList;
23  import java.util.List;
24  
25  /**
26   * <div>
27   * Helper class used to parse HTML tags or generic type identifiers
28   * from a single-line of text. Just the beginning of the HTML tag
29   * is located.  No attempt is made to parse out the complete tag,
30   * particularly since some of the tag parameters could be located
31   * on the following line of text.  The {@code hasNextTag} and
32   * {@code nextTag} methods are used to iterate through the HTML
33   * tags or generic type identifiers that were found on the line of text.
34   * </div>
35   *
36   * <p>
37   * This class isn't really specific to HTML tags. Currently, the only HTML
38   * tag that this class looks specifically for is the HTML comment tag.
39   * This class helps figure out if a tag exists and if it is well-formed.
40   * It does not know whether it is valid HTML.  This class is also used for
41   * generics types which looks like opening HTML tags ex: {@code <T>, <E>, <V>,
42   * <MY_FOO_TYPE>}, etc. According to this class they are valid tags.
43   * </p>
44   *
45   */
46  class TagParser {
47  
48      /** HtmlTags found on the input line of text. */
49      private final List<HtmlTag> tags = new LinkedList<>();
50  
51      /**
52       * Constructs a TagParser and finds the first tag if any.
53       *
54       * @param text the line of text to parse.
55       * @param lineNo the source line number.
56       */
57      /* package */ TagParser(String[] text, int lineNo) {
58          parseTags(text, lineNo);
59      }
60  
61      /**
62       * Returns the next available HtmlTag.
63       *
64       * @return a HtmlTag or {@code null} if none available.
65       * @throws IndexOutOfBoundsException if there are no HtmlTags
66       *         left to return.
67       */
68      public HtmlTag nextTag() {
69          return tags.remove(0);
70      }
71  
72      /**
73       * Indicates if there are any more HtmlTag to retrieve.
74       *
75       * @return {@code true} if there are more tags.
76       */
77      public boolean hasNextTag() {
78          return !tags.isEmpty();
79      }
80  
81      /**
82       * Performs lazy initialization on the internal tags List
83       * and adds the tag.
84       *
85       * @param tag the HtmlTag to add.
86       */
87      private void add(HtmlTag tag) {
88          tags.add(tag);
89      }
90  
91      /**
92       * Parses the text line for any HTML tags and adds them to the internal
93       * List of tags.
94       *
95       * @param text the source line to parse.
96       * @param lineNo the source line number.
97       */
98      private void parseTags(String[] text, int lineNo) {
99          final int nLines = text.length;
100         Point position = new Point(0, 0);
101         while (position.getLineNo() < nLines) {
102             // if this is html comment then skip it
103             if (isCommentTag(text, position)) {
104                 position = skipHtmlComment(text, position);
105             }
106             else if (isTag(text, position)) {
107                 position = parseTag(text, lineNo, nLines, position);
108             }
109             else {
110                 position = getNextPoint(text, position);
111             }
112             position = findChar(text, '<', position);
113         }
114     }
115 
116     /**
117      * Parses the tag and return position after it.
118      *
119      * @param text the source line to parse.
120      * @param lineNo the source line number.
121      * @param nLines line length
122      * @param position start position for parsing
123      * @return position after tag
124      */
125     private Point parseTag(String[] text, int lineNo, final int nLines, Point position) {
126         // find end of tag
127         final Point endTag = findChar(text, '>', position);
128         final boolean incompleteTag = endTag.getLineNo() >= nLines;
129         // get tag id (one word)
130         final String tagId = getTagId(text, position);
131         // is this closed tag
132         final boolean closedTag =
133                 endTag.getLineNo() < nLines
134                  && text[endTag.getLineNo()]
135                  .charAt(endTag.getColumnNo() - 1) == '/';
136         // add new tag
137         add(new HtmlTag(tagId,
138                         position.getLineNo() + lineNo,
139                         position.getColumnNo(),
140                         closedTag,
141                         incompleteTag,
142                         text[position.getLineNo()]));
143         return endTag;
144     }
145 
146     /**
147      * Checks if the given position is start one for HTML tag.
148      *
149      * @param javadocText text of javadoc comments.
150      * @param pos position to check.
151      * @return {@code true} some HTML tag starts from given position.
152      */
153     private static boolean isTag(String[] javadocText, Point pos) {
154         final int column = pos.getColumnNo() + 1;
155         final String text = javadocText[pos.getLineNo()];
156 
157         // Character.isJavaIdentifier... may not be a valid HTML
158         // identifier but is valid for generics
159         return column >= text.length()
160                 || Character.isJavaIdentifierStart(text.charAt(column))
161                     || text.charAt(column) == '/';
162     }
163 
164     /**
165      * Parse tag id.
166      *
167      * @param javadocText text of javadoc comments.
168      * @param tagStart start position of the tag
169      * @return id for given tag
170      */
171     private static String getTagId(String[] javadocText, Point tagStart) {
172         String tagId = "";
173         int column = tagStart.getColumnNo() + 1;
174         String text = javadocText[tagStart.getLineNo()];
175         if (column < text.length()) {
176             if (text.charAt(column) == '/') {
177                 column++;
178             }
179             text = text.substring(column);
180             int position = 0;
181 
182             // Character.isJavaIdentifier... may not be a valid HTML
183             // identifier but is valid for generics
184             while (position < text.length()
185                     && Character.isJavaIdentifierPart(text.charAt(position))) {
186                 position++;
187             }
188 
189             tagId = text.substring(0, position);
190         }
191         return tagId;
192     }
193 
194     /**
195      * If this is a HTML-comments.
196      *
197      * @param text text of javadoc comments
198      * @param pos position to check
199      * @return {@code true} if HTML-comments
200      *         starts form given position.
201      */
202     private static boolean isCommentTag(String[] text, Point pos) {
203         return text[pos.getLineNo()].startsWith("<!--", pos.getColumnNo());
204     }
205 
206     /**
207      * Skips HTML comments.
208      *
209      * @param text text of javadoc comments.
210      * @param fromPoint start position of HTML-comments
211      * @return position after HTML-comments
212      */
213     private static Point skipHtmlComment(String[] text, Point fromPoint) {
214         Point toPoint = fromPoint;
215         while (toPoint.getLineNo() < text.length && !text[toPoint.getLineNo()]
216                 .substring(0, toPoint.getColumnNo() + 1).endsWith("-->")) {
217             toPoint = findChar(text, '>', getNextPoint(text, toPoint));
218         }
219         return toPoint;
220     }
221 
222     /**
223      * Finds next occurrence of given character.
224      *
225      * @param text text to search
226      * @param character character to search
227      * @param from position to start search
228      * @return position of next occurrence of given character
229      */
230     private static Point findChar(String[] text, char character, Point from) {
231         Point curr = new Point(from.getLineNo(), from.getColumnNo());
232         while (curr.getLineNo() < text.length
233                && text[curr.getLineNo()].charAt(curr.getColumnNo()) != character) {
234             curr = getNextPoint(text, curr);
235         }
236 
237         return curr;
238     }
239 
240     /**
241      * Increments column number to be examined, moves onto the next line when no
242      * more characters are available.
243      *
244      * @param text to search.
245      * @param from location to search from
246      * @return next point to be examined
247      */
248     private static Point getNextPoint(String[] text, Point from) {
249         int line = from.getLineNo();
250         int column = from.getColumnNo() + 1;
251         while (line < text.length && column >= text[line].length()) {
252             // go to the next line
253             line++;
254             column = 0;
255         }
256         return new Point(line, column);
257     }
258 
259     /**
260      * Represents current position in the text.
261      */
262     private static final class Point {
263 
264         /** Line number. */
265         private final int lineNo;
266         /** Column number.*/
267         private final int columnNo;
268 
269         /**
270          * Creates new {@code Point} instance.
271          *
272          * @param lineNo line number
273          * @param columnNo column number
274          */
275         private Point(int lineNo, int columnNo) {
276             this.lineNo = lineNo;
277             this.columnNo = columnNo;
278         }
279 
280         /**
281          * Getter for line number.
282          *
283          * @return line number of the position.
284          */
285         public int getLineNo() {
286             return lineNo;
287         }
288 
289         /**
290          * Getter for column number.
291          *
292          * @return column number of the position.
293          */
294         public int getColumnNo() {
295             return columnNo;
296         }
297 
298     }
299 
300 }