001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2024 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.javadoc;
021
022import java.util.LinkedList;
023import java.util.List;
024
025/**
026 * <p>
027 * Helper class used to parse HTML tags or generic type identifiers
028 * from a single-line of text. Just the beginning of the HTML tag
029 * is located.  No attempt is made to parse out the complete tag,
030 * particularly since some of the tag parameters could be located
031 * on the following line of text.  The {@code hasNextTag} and
032 * {@code nextTag} methods are used to iterate through the HTML
033 * tags or generic type identifiers that were found on the line of text.
034 * </p>
035 *
036 * <p>
037 * This class isn't really specific to HTML tags. Currently, the only HTML
038 * tag that this class looks specifically for is the HTML comment tag.
039 * This class helps figure out if a tag exists and if it is well-formed.
040 * It does not know whether it is valid HTML.  This class is also used for
041 * generics types which looks like opening HTML tags ex: {@code <T>, <E>, <V>,
042 * <MY_FOO_TYPE>}, etc. According to this class they are valid tags.
043 * </p>
044 *
045 */
046class TagParser {
047
048    /** HtmlTags found on the input line of text. */
049    private final List<HtmlTag> tags = new LinkedList<>();
050
051    /**
052     * Constructs a TagParser and finds the first tag if any.
053     *
054     * @param text the line of text to parse.
055     * @param lineNo the source line number.
056     */
057    /* package */ TagParser(String[] text, int lineNo) {
058        parseTags(text, lineNo);
059    }
060
061    /**
062     * Returns the next available HtmlTag.
063     *
064     * @return a HtmlTag or {@code null} if none available.
065     * @throws IndexOutOfBoundsException if there are no HtmlTags
066     *         left to return.
067     */
068    public HtmlTag nextTag() {
069        return tags.remove(0);
070    }
071
072    /**
073     * Indicates if there are any more HtmlTag to retrieve.
074     *
075     * @return {@code true} if there are more tags.
076     */
077    public boolean hasNextTag() {
078        return !tags.isEmpty();
079    }
080
081    /**
082     * Performs lazy initialization on the internal tags List
083     * and adds the tag.
084     *
085     * @param tag the HtmlTag to add.
086     */
087    private void add(HtmlTag tag) {
088        tags.add(tag);
089    }
090
091    /**
092     * Parses the text line for any HTML tags and adds them to the internal
093     * List of tags.
094     *
095     * @param text the source line to parse.
096     * @param lineNo the source line number.
097     */
098    private void parseTags(String[] text, int lineNo) {
099        final int nLines = text.length;
100        Point position = new Point(0, 0);
101        while (position.getLineNo() < nLines) {
102            // if this is html comment then skip it
103            if (isCommentTag(text, position)) {
104                position = skipHtmlComment(text, position);
105            }
106            else if (isTag(text, position)) {
107                position = parseTag(text, lineNo, nLines, position);
108            }
109            else {
110                position = getNextPoint(text, position);
111            }
112            position = findChar(text, '<', position);
113        }
114    }
115
116    /**
117     * Parses the tag and return position after it.
118     *
119     * @param text the source line to parse.
120     * @param lineNo the source line number.
121     * @param nLines line length
122     * @param position start position for parsing
123     * @return position after tag
124     */
125    private Point parseTag(String[] text, int lineNo, final int nLines, Point position) {
126        // find end of tag
127        final Point endTag = findChar(text, '>', position);
128        final boolean incompleteTag = endTag.getLineNo() >= nLines;
129        // get tag id (one word)
130        final String tagId = getTagId(text, position);
131        // is this closed tag
132        final boolean closedTag =
133                endTag.getLineNo() < nLines
134                 && text[endTag.getLineNo()]
135                 .charAt(endTag.getColumnNo() - 1) == '/';
136        // add new tag
137        add(new HtmlTag(tagId,
138                        position.getLineNo() + lineNo,
139                        position.getColumnNo(),
140                        closedTag,
141                        incompleteTag,
142                        text[position.getLineNo()]));
143        return endTag;
144    }
145
146    /**
147     * Checks if the given position is start one for HTML tag.
148     *
149     * @param javadocText text of javadoc comments.
150     * @param pos position to check.
151     * @return {@code true} some HTML tag starts from given position.
152     */
153    private static boolean isTag(String[] javadocText, Point pos) {
154        final int column = pos.getColumnNo() + 1;
155        final String text = javadocText[pos.getLineNo()];
156
157        // Character.isJavaIdentifier... may not be a valid HTML
158        // identifier but is valid for generics
159        return column >= text.length()
160                || Character.isJavaIdentifierStart(text.charAt(column))
161                    || text.charAt(column) == '/';
162    }
163
164    /**
165     * Parse tag id.
166     *
167     * @param javadocText text of javadoc comments.
168     * @param tagStart start position of the tag
169     * @return id for given tag
170     */
171    private static String getTagId(String[] javadocText, Point tagStart) {
172        String tagId = "";
173        int column = tagStart.getColumnNo() + 1;
174        String text = javadocText[tagStart.getLineNo()];
175        if (column < text.length()) {
176            if (text.charAt(column) == '/') {
177                column++;
178            }
179            text = text.substring(column);
180            int position = 0;
181
182            // Character.isJavaIdentifier... may not be a valid HTML
183            // identifier but is valid for generics
184            while (position < text.length()
185                    && Character.isJavaIdentifierPart(text.charAt(position))) {
186                position++;
187            }
188
189            tagId = text.substring(0, position);
190        }
191        return tagId;
192    }
193
194    /**
195     * If this is a HTML-comments.
196     *
197     * @param text text of javadoc comments
198     * @param pos position to check
199     * @return {@code true} if HTML-comments
200     *         starts form given position.
201     */
202    private static boolean isCommentTag(String[] text, Point pos) {
203        return text[pos.getLineNo()].startsWith("<!--", pos.getColumnNo());
204    }
205
206    /**
207     * Skips HTML comments.
208     *
209     * @param text text of javadoc comments.
210     * @param fromPoint start position of HTML-comments
211     * @return position after HTML-comments
212     */
213    private static Point skipHtmlComment(String[] text, Point fromPoint) {
214        Point toPoint = fromPoint;
215        while (toPoint.getLineNo() < text.length && !text[toPoint.getLineNo()]
216                .substring(0, toPoint.getColumnNo() + 1).endsWith("-->")) {
217            toPoint = findChar(text, '>', getNextPoint(text, toPoint));
218        }
219        return toPoint;
220    }
221
222    /**
223     * Finds next occurrence of given character.
224     *
225     * @param text text to search
226     * @param character character to search
227     * @param from position to start search
228     * @return position of next occurrence of given character
229     */
230    private static Point findChar(String[] text, char character, Point from) {
231        Point curr = new Point(from.getLineNo(), from.getColumnNo());
232        while (curr.getLineNo() < text.length
233               && text[curr.getLineNo()].charAt(curr.getColumnNo()) != character) {
234            curr = getNextPoint(text, curr);
235        }
236
237        return curr;
238    }
239
240    /**
241     * Increments column number to be examined, moves onto the next line when no
242     * more characters are available.
243     *
244     * @param text to search.
245     * @param from location to search from
246     * @return next point to be examined
247     */
248    private static Point getNextPoint(String[] text, Point from) {
249        int line = from.getLineNo();
250        int column = from.getColumnNo() + 1;
251        while (line < text.length && column >= text[line].length()) {
252            // go to the next line
253            line++;
254            column = 0;
255        }
256        return new Point(line, column);
257    }
258
259    /**
260     * Represents current position in the text.
261     */
262    private static final class Point {
263
264        /** Line number. */
265        private final int lineNo;
266        /** Column number.*/
267        private final int columnNo;
268
269        /**
270         * Creates new {@code Point} instance.
271         *
272         * @param lineNo line number
273         * @param columnNo column number
274         */
275        private Point(int lineNo, int columnNo) {
276            this.lineNo = lineNo;
277            this.columnNo = columnNo;
278        }
279
280        /**
281         * Getter for line number.
282         *
283         * @return line number of the position.
284         */
285        public int getLineNo() {
286            return lineNo;
287        }
288
289        /**
290         * Getter for column number.
291         *
292         * @return column number of the position.
293         */
294        public int getColumnNo() {
295            return columnNo;
296        }
297
298    }
299
300}