1 /////////////////////////////////////////////////////////////////////////////////////////////// 2 // checkstyle: Checks Java source code and other text files for adherence to a set of rules. 3 // Copyright (C) 2001-2024 the original author or authors. 4 // 5 // This library is free software; you can redistribute it and/or 6 // modify it under the terms of the GNU Lesser General Public 7 // License as published by the Free Software Foundation; either 8 // version 2.1 of the License, or (at your option) any later version. 9 // 10 // This library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 // Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public 16 // License along with this library; if not, write to the Free Software 17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 /////////////////////////////////////////////////////////////////////////////////////////////// 19 20 package com.puppycrawl.tools.checkstyle.checks.javadoc; 21 22 import java.util.LinkedList; 23 import java.util.List; 24 25 /** 26 * <div> 27 * Helper class used to parse HTML tags or generic type identifiers 28 * from a single-line of text. Just the beginning of the HTML tag 29 * is located. No attempt is made to parse out the complete tag, 30 * particularly since some of the tag parameters could be located 31 * on the following line of text. The {@code hasNextTag} and 32 * {@code nextTag} methods are used to iterate through the HTML 33 * tags or generic type identifiers that were found on the line of text. 34 * </div> 35 * 36 * <p> 37 * This class isn't really specific to HTML tags. Currently, the only HTML 38 * tag that this class looks specifically for is the HTML comment tag. 39 * This class helps figure out if a tag exists and if it is well-formed. 40 * It does not know whether it is valid HTML. This class is also used for 41 * generics types which looks like opening HTML tags ex: {@code <T>, <E>, <V>, 42 * <MY_FOO_TYPE>}, etc. According to this class they are valid tags. 43 * </p> 44 * 45 */ 46 class TagParser { 47 48 /** HtmlTags found on the input line of text. */ 49 private final List<HtmlTag> tags = new LinkedList<>(); 50 51 /** 52 * Constructs a TagParser and finds the first tag if any. 53 * 54 * @param text the line of text to parse. 55 * @param lineNo the source line number. 56 */ 57 /* package */ TagParser(String[] text, int lineNo) { 58 parseTags(text, lineNo); 59 } 60 61 /** 62 * Returns the next available HtmlTag. 63 * 64 * @return a HtmlTag or {@code null} if none available. 65 * @throws IndexOutOfBoundsException if there are no HtmlTags 66 * left to return. 67 */ 68 public HtmlTag nextTag() { 69 return tags.remove(0); 70 } 71 72 /** 73 * Indicates if there are any more HtmlTag to retrieve. 74 * 75 * @return {@code true} if there are more tags. 76 */ 77 public boolean hasNextTag() { 78 return !tags.isEmpty(); 79 } 80 81 /** 82 * Performs lazy initialization on the internal tags List 83 * and adds the tag. 84 * 85 * @param tag the HtmlTag to add. 86 */ 87 private void add(HtmlTag tag) { 88 tags.add(tag); 89 } 90 91 /** 92 * Parses the text line for any HTML tags and adds them to the internal 93 * List of tags. 94 * 95 * @param text the source line to parse. 96 * @param lineNo the source line number. 97 */ 98 private void parseTags(String[] text, int lineNo) { 99 final int nLines = text.length; 100 Point position = new Point(0, 0); 101 while (position.getLineNo() < nLines) { 102 // if this is html comment then skip it 103 if (isCommentTag(text, position)) { 104 position = skipHtmlComment(text, position); 105 } 106 else if (isTag(text, position)) { 107 position = parseTag(text, lineNo, nLines, position); 108 } 109 else { 110 position = getNextPoint(text, position); 111 } 112 position = findChar(text, '<', position); 113 } 114 } 115 116 /** 117 * Parses the tag and return position after it. 118 * 119 * @param text the source line to parse. 120 * @param lineNo the source line number. 121 * @param nLines line length 122 * @param position start position for parsing 123 * @return position after tag 124 */ 125 private Point parseTag(String[] text, int lineNo, final int nLines, Point position) { 126 // find end of tag 127 final Point endTag = findChar(text, '>', position); 128 final boolean incompleteTag = endTag.getLineNo() >= nLines; 129 // get tag id (one word) 130 final String tagId = getTagId(text, position); 131 // is this closed tag 132 final boolean closedTag = 133 endTag.getLineNo() < nLines 134 && text[endTag.getLineNo()] 135 .charAt(endTag.getColumnNo() - 1) == '/'; 136 // add new tag 137 add(new HtmlTag(tagId, 138 position.getLineNo() + lineNo, 139 position.getColumnNo(), 140 closedTag, 141 incompleteTag, 142 text[position.getLineNo()])); 143 return endTag; 144 } 145 146 /** 147 * Checks if the given position is start one for HTML tag. 148 * 149 * @param javadocText text of javadoc comments. 150 * @param pos position to check. 151 * @return {@code true} some HTML tag starts from given position. 152 */ 153 private static boolean isTag(String[] javadocText, Point pos) { 154 final int column = pos.getColumnNo() + 1; 155 final String text = javadocText[pos.getLineNo()]; 156 157 // Character.isJavaIdentifier... may not be a valid HTML 158 // identifier but is valid for generics 159 return column >= text.length() 160 || Character.isJavaIdentifierStart(text.charAt(column)) 161 || text.charAt(column) == '/'; 162 } 163 164 /** 165 * Parse tag id. 166 * 167 * @param javadocText text of javadoc comments. 168 * @param tagStart start position of the tag 169 * @return id for given tag 170 */ 171 private static String getTagId(String[] javadocText, Point tagStart) { 172 String tagId = ""; 173 int column = tagStart.getColumnNo() + 1; 174 String text = javadocText[tagStart.getLineNo()]; 175 if (column < text.length()) { 176 if (text.charAt(column) == '/') { 177 column++; 178 } 179 text = text.substring(column); 180 int position = 0; 181 182 // Character.isJavaIdentifier... may not be a valid HTML 183 // identifier but is valid for generics 184 while (position < text.length() 185 && Character.isJavaIdentifierPart(text.charAt(position))) { 186 position++; 187 } 188 189 tagId = text.substring(0, position); 190 } 191 return tagId; 192 } 193 194 /** 195 * If this is a HTML-comments. 196 * 197 * @param text text of javadoc comments 198 * @param pos position to check 199 * @return {@code true} if HTML-comments 200 * starts form given position. 201 */ 202 private static boolean isCommentTag(String[] text, Point pos) { 203 return text[pos.getLineNo()].startsWith("<!--", pos.getColumnNo()); 204 } 205 206 /** 207 * Skips HTML comments. 208 * 209 * @param text text of javadoc comments. 210 * @param fromPoint start position of HTML-comments 211 * @return position after HTML-comments 212 */ 213 private static Point skipHtmlComment(String[] text, Point fromPoint) { 214 Point toPoint = fromPoint; 215 while (toPoint.getLineNo() < text.length && !text[toPoint.getLineNo()] 216 .substring(0, toPoint.getColumnNo() + 1).endsWith("-->")) { 217 toPoint = findChar(text, '>', getNextPoint(text, toPoint)); 218 } 219 return toPoint; 220 } 221 222 /** 223 * Finds next occurrence of given character. 224 * 225 * @param text text to search 226 * @param character character to search 227 * @param from position to start search 228 * @return position of next occurrence of given character 229 */ 230 private static Point findChar(String[] text, char character, Point from) { 231 Point curr = new Point(from.getLineNo(), from.getColumnNo()); 232 while (curr.getLineNo() < text.length 233 && text[curr.getLineNo()].charAt(curr.getColumnNo()) != character) { 234 curr = getNextPoint(text, curr); 235 } 236 237 return curr; 238 } 239 240 /** 241 * Increments column number to be examined, moves onto the next line when no 242 * more characters are available. 243 * 244 * @param text to search. 245 * @param from location to search from 246 * @return next point to be examined 247 */ 248 private static Point getNextPoint(String[] text, Point from) { 249 int line = from.getLineNo(); 250 int column = from.getColumnNo() + 1; 251 while (line < text.length && column >= text[line].length()) { 252 // go to the next line 253 line++; 254 column = 0; 255 } 256 return new Point(line, column); 257 } 258 259 /** 260 * Represents current position in the text. 261 */ 262 private static final class Point { 263 264 /** Line number. */ 265 private final int lineNo; 266 /** Column number.*/ 267 private final int columnNo; 268 269 /** 270 * Creates new {@code Point} instance. 271 * 272 * @param lineNo line number 273 * @param columnNo column number 274 */ 275 private Point(int lineNo, int columnNo) { 276 this.lineNo = lineNo; 277 this.columnNo = columnNo; 278 } 279 280 /** 281 * Getter for line number. 282 * 283 * @return line number of the position. 284 */ 285 public int getLineNo() { 286 return lineNo; 287 } 288 289 /** 290 * Getter for column number. 291 * 292 * @return column number of the position. 293 */ 294 public int getColumnNo() { 295 return columnNo; 296 } 297 298 } 299 300 }