001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2024 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.javadoc; 021 022import java.util.LinkedList; 023import java.util.List; 024 025/** 026 * <p> 027 * Helper class used to parse HTML tags or generic type identifiers 028 * from a single-line of text. Just the beginning of the HTML tag 029 * is located. No attempt is made to parse out the complete tag, 030 * particularly since some of the tag parameters could be located 031 * on the following line of text. The {@code hasNextTag} and 032 * {@code nextTag} methods are used to iterate through the HTML 033 * tags or generic type identifiers that were found on the line of text. 034 * </p> 035 * 036 * <p> 037 * This class isn't really specific to HTML tags. Currently, the only HTML 038 * tag that this class looks specifically for is the HTML comment tag. 039 * This class helps figure out if a tag exists and if it is well-formed. 040 * It does not know whether it is valid HTML. This class is also used for 041 * generics types which looks like opening HTML tags ex: {@code <T>, <E>, <V>, 042 * <MY_FOO_TYPE>}, etc. According to this class they are valid tags. 043 * </p> 044 * 045 */ 046class TagParser { 047 048 /** HtmlTags found on the input line of text. */ 049 private final List<HtmlTag> tags = new LinkedList<>(); 050 051 /** 052 * Constructs a TagParser and finds the first tag if any. 053 * 054 * @param text the line of text to parse. 055 * @param lineNo the source line number. 056 */ 057 /* package */ TagParser(String[] text, int lineNo) { 058 parseTags(text, lineNo); 059 } 060 061 /** 062 * Returns the next available HtmlTag. 063 * 064 * @return a HtmlTag or {@code null} if none available. 065 * @throws IndexOutOfBoundsException if there are no HtmlTags 066 * left to return. 067 */ 068 public HtmlTag nextTag() { 069 return tags.remove(0); 070 } 071 072 /** 073 * Indicates if there are any more HtmlTag to retrieve. 074 * 075 * @return {@code true} if there are more tags. 076 */ 077 public boolean hasNextTag() { 078 return !tags.isEmpty(); 079 } 080 081 /** 082 * Performs lazy initialization on the internal tags List 083 * and adds the tag. 084 * 085 * @param tag the HtmlTag to add. 086 */ 087 private void add(HtmlTag tag) { 088 tags.add(tag); 089 } 090 091 /** 092 * Parses the text line for any HTML tags and adds them to the internal 093 * List of tags. 094 * 095 * @param text the source line to parse. 096 * @param lineNo the source line number. 097 */ 098 private void parseTags(String[] text, int lineNo) { 099 final int nLines = text.length; 100 Point position = new Point(0, 0); 101 while (position.getLineNo() < nLines) { 102 // if this is html comment then skip it 103 if (isCommentTag(text, position)) { 104 position = skipHtmlComment(text, position); 105 } 106 else if (isTag(text, position)) { 107 position = parseTag(text, lineNo, nLines, position); 108 } 109 else { 110 position = getNextPoint(text, position); 111 } 112 position = findChar(text, '<', position); 113 } 114 } 115 116 /** 117 * Parses the tag and return position after it. 118 * 119 * @param text the source line to parse. 120 * @param lineNo the source line number. 121 * @param nLines line length 122 * @param position start position for parsing 123 * @return position after tag 124 */ 125 private Point parseTag(String[] text, int lineNo, final int nLines, Point position) { 126 // find end of tag 127 final Point endTag = findChar(text, '>', position); 128 final boolean incompleteTag = endTag.getLineNo() >= nLines; 129 // get tag id (one word) 130 final String tagId = getTagId(text, position); 131 // is this closed tag 132 final boolean closedTag = 133 endTag.getLineNo() < nLines 134 && text[endTag.getLineNo()] 135 .charAt(endTag.getColumnNo() - 1) == '/'; 136 // add new tag 137 add(new HtmlTag(tagId, 138 position.getLineNo() + lineNo, 139 position.getColumnNo(), 140 closedTag, 141 incompleteTag, 142 text[position.getLineNo()])); 143 return endTag; 144 } 145 146 /** 147 * Checks if the given position is start one for HTML tag. 148 * 149 * @param javadocText text of javadoc comments. 150 * @param pos position to check. 151 * @return {@code true} some HTML tag starts from given position. 152 */ 153 private static boolean isTag(String[] javadocText, Point pos) { 154 final int column = pos.getColumnNo() + 1; 155 final String text = javadocText[pos.getLineNo()]; 156 157 // Character.isJavaIdentifier... may not be a valid HTML 158 // identifier but is valid for generics 159 return column >= text.length() 160 || Character.isJavaIdentifierStart(text.charAt(column)) 161 || text.charAt(column) == '/'; 162 } 163 164 /** 165 * Parse tag id. 166 * 167 * @param javadocText text of javadoc comments. 168 * @param tagStart start position of the tag 169 * @return id for given tag 170 */ 171 private static String getTagId(String[] javadocText, Point tagStart) { 172 String tagId = ""; 173 int column = tagStart.getColumnNo() + 1; 174 String text = javadocText[tagStart.getLineNo()]; 175 if (column < text.length()) { 176 if (text.charAt(column) == '/') { 177 column++; 178 } 179 text = text.substring(column); 180 int position = 0; 181 182 // Character.isJavaIdentifier... may not be a valid HTML 183 // identifier but is valid for generics 184 while (position < text.length() 185 && Character.isJavaIdentifierPart(text.charAt(position))) { 186 position++; 187 } 188 189 tagId = text.substring(0, position); 190 } 191 return tagId; 192 } 193 194 /** 195 * If this is a HTML-comments. 196 * 197 * @param text text of javadoc comments 198 * @param pos position to check 199 * @return {@code true} if HTML-comments 200 * starts form given position. 201 */ 202 private static boolean isCommentTag(String[] text, Point pos) { 203 return text[pos.getLineNo()].startsWith("<!--", pos.getColumnNo()); 204 } 205 206 /** 207 * Skips HTML comments. 208 * 209 * @param text text of javadoc comments. 210 * @param fromPoint start position of HTML-comments 211 * @return position after HTML-comments 212 */ 213 private static Point skipHtmlComment(String[] text, Point fromPoint) { 214 Point toPoint = fromPoint; 215 while (toPoint.getLineNo() < text.length && !text[toPoint.getLineNo()] 216 .substring(0, toPoint.getColumnNo() + 1).endsWith("-->")) { 217 toPoint = findChar(text, '>', getNextPoint(text, toPoint)); 218 } 219 return toPoint; 220 } 221 222 /** 223 * Finds next occurrence of given character. 224 * 225 * @param text text to search 226 * @param character character to search 227 * @param from position to start search 228 * @return position of next occurrence of given character 229 */ 230 private static Point findChar(String[] text, char character, Point from) { 231 Point curr = new Point(from.getLineNo(), from.getColumnNo()); 232 while (curr.getLineNo() < text.length 233 && text[curr.getLineNo()].charAt(curr.getColumnNo()) != character) { 234 curr = getNextPoint(text, curr); 235 } 236 237 return curr; 238 } 239 240 /** 241 * Increments column number to be examined, moves onto the next line when no 242 * more characters are available. 243 * 244 * @param text to search. 245 * @param from location to search from 246 * @return next point to be examined 247 */ 248 private static Point getNextPoint(String[] text, Point from) { 249 int line = from.getLineNo(); 250 int column = from.getColumnNo() + 1; 251 while (line < text.length && column >= text[line].length()) { 252 // go to the next line 253 line++; 254 column = 0; 255 } 256 return new Point(line, column); 257 } 258 259 /** 260 * Represents current position in the text. 261 */ 262 private static final class Point { 263 264 /** Line number. */ 265 private final int lineNo; 266 /** Column number.*/ 267 private final int columnNo; 268 269 /** 270 * Creates new {@code Point} instance. 271 * 272 * @param lineNo line number 273 * @param columnNo column number 274 */ 275 private Point(int lineNo, int columnNo) { 276 this.lineNo = lineNo; 277 this.columnNo = columnNo; 278 } 279 280 /** 281 * Getter for line number. 282 * 283 * @return line number of the position. 284 */ 285 public int getLineNo() { 286 return lineNo; 287 } 288 289 /** 290 * Getter for column number. 291 * 292 * @return column number of the position. 293 */ 294 public int getColumnNo() { 295 return columnNo; 296 } 297 298 } 299 300}