001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2024 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.utils;
021
022import java.io.File;
023import java.io.IOException;
024import java.util.ArrayList;
025import java.util.BitSet;
026import java.util.List;
027import java.util.Locale;
028import java.util.regex.Pattern;
029import java.util.stream.Collectors;
030
031import com.puppycrawl.tools.checkstyle.AstTreeStringPrinter;
032import com.puppycrawl.tools.checkstyle.JavaParser;
033import com.puppycrawl.tools.checkstyle.api.CheckstyleException;
034import com.puppycrawl.tools.checkstyle.api.DetailAST;
035import com.puppycrawl.tools.checkstyle.api.TokenTypes;
036import com.puppycrawl.tools.checkstyle.xpath.AbstractNode;
037import com.puppycrawl.tools.checkstyle.xpath.ElementNode;
038import com.puppycrawl.tools.checkstyle.xpath.RootNode;
039import net.sf.saxon.Configuration;
040import net.sf.saxon.om.Item;
041import net.sf.saxon.om.NodeInfo;
042import net.sf.saxon.sxpath.XPathDynamicContext;
043import net.sf.saxon.sxpath.XPathEvaluator;
044import net.sf.saxon.sxpath.XPathExpression;
045import net.sf.saxon.trans.XPathException;
046
047/**
048 * Contains utility methods for xpath.
049 *
050 */
051public final class XpathUtil {
052
053    /**
054     * Token types which support text attribute.
055     * These token types were selected based on analysis that all others do not match required
056     * criteria - text attribute of the token must be useful and help to retrieve more precise
057     * results.
058     * There are three types of AST tokens:
059     * 1. Tokens for which the texts are equal to the name of the token. Or in other words,
060     * nodes for which the following expression is always true:
061     * <pre>
062     *     detailAst.getText().equals(TokenUtil.getTokenName(detailAst.getType()))
063     * </pre>
064     * For example:
065     * <pre>
066     *     //MODIFIERS[@text='MODIFIERS']
067     *     //OBJBLOCK[@text='OBJBLOCK']
068     * </pre>
069     * These tokens do not match required criteria because their texts do not carry any additional
070     * information, they do not affect the xpath requests and do not help to get more accurate
071     * results. The texts of these nodes are useless. No matter what code you analyze, these
072     * texts are always the same.
073     * In addition, they make xpath queries more complex, less readable and verbose.
074     * 2. Tokens for which the texts differ from token names, but texts are always constant.
075     * For example:
076     * <pre>
077     *     //LITERAL_VOID[@text='void']
078     *     //RCURLY[@text='}']
079     * </pre>
080     * These tokens are not used for the same reasons as were described in the previous part.
081     * 3. Tokens for which texts are not constant. The texts of these nodes are closely related
082     * to a concrete class, method, variable and so on.
083     * For example:
084     * <pre>
085     *     String greeting = "HelloWorld";
086     *     //STRING_LITERAL[@text='HelloWorld']
087     * </pre>
088     * <pre>
089     *     int year = 2017;
090     *     //NUM_INT[@text=2017]
091     * </pre>
092     * <pre>
093     *     int age = 23;
094     *     //NUM_INT[@text=23]
095     * </pre>
096     * As you can see same {@code NUM_INT} token type can have different texts, depending on
097     * context.
098     * <pre>
099     *     public class MyClass {}
100     *     //IDENT[@text='MyClass']
101     * </pre>
102     * Only these tokens support text attribute because they make our xpath queries more accurate.
103     * These token types are listed below.
104     */
105    private static final BitSet TOKEN_TYPES_WITH_TEXT_ATTRIBUTE = TokenUtil.asBitSet(
106            TokenTypes.IDENT, TokenTypes.STRING_LITERAL, TokenTypes.CHAR_LITERAL,
107            TokenTypes.NUM_LONG, TokenTypes.NUM_INT, TokenTypes.NUM_DOUBLE, TokenTypes.NUM_FLOAT,
108            TokenTypes.TEXT_BLOCK_CONTENT, TokenTypes.COMMENT_CONTENT
109        );
110
111    /**
112     * This regexp is used to convert new line to newline tag.
113     */
114    private static final Pattern NEWLINE_TO_TAG = Pattern.compile("\n");
115
116    /**
117     * This regexp is used to convert carriage return to carriage-return tag.
118     */
119    private static final Pattern CARRIAGE_RETURN_TO_TAG = Pattern.compile("\r");
120
121    /** Delimiter to separate xpath results. */
122    private static final String DELIMITER = "---------" + System.lineSeparator();
123
124    /** Stop instances being created. **/
125    private XpathUtil() {
126    }
127
128    /**
129     * Iterates siblings of the given node and creates new Xpath-nodes.
130     *
131     * @param root the root node
132     * @param parent the parent node
133     * @param firstChild the first DetailAST
134     * @return children list
135     */
136    public static List<AbstractNode> createChildren(AbstractNode root, AbstractNode parent,
137                                                    DetailAST firstChild) {
138        DetailAST currentChild = firstChild;
139        final int depth = parent.getDepth() + 1;
140        final List<AbstractNode> result = new ArrayList<>();
141        while (currentChild != null) {
142            final int index = result.size();
143            final ElementNode child = new ElementNode(root, parent, currentChild, depth, index);
144            result.add(child);
145            currentChild = currentChild.getNextSibling();
146        }
147        return result;
148    }
149
150    /**
151     * Checks, if specified node can have {@code @text} attribute.
152     *
153     * @param ast {@code DetailAst} element
154     * @return true if element supports {@code @text} attribute, false otherwise
155     */
156    public static boolean supportsTextAttribute(DetailAST ast) {
157        return TOKEN_TYPES_WITH_TEXT_ATTRIBUTE.get(ast.getType());
158    }
159
160    /**
161     * Returns content of the text attribute of the ast element.
162     *
163     * @param ast {@code DetailAst} element
164     * @return text attribute of the ast element
165     */
166    public static String getTextAttributeValue(DetailAST ast) {
167        String text = ast.getText();
168        if (ast.getType() == TokenTypes.STRING_LITERAL) {
169            text = text.substring(1, text.length() - 1);
170        }
171        text = CARRIAGE_RETURN_TO_TAG.matcher(text).replaceAll("\\\\r");
172        return NEWLINE_TO_TAG.matcher(text).replaceAll("\\\\n");
173    }
174
175    /**
176     * Returns xpath query results on file as string.
177     *
178     * @param xpath query to evaluate
179     * @param file file to run on
180     * @return all results as string separated by delimiter
181     * @throws CheckstyleException if some parsing error happens
182     * @throws IOException if an error occurs
183     */
184    public static String printXpathBranch(String xpath, File file) throws CheckstyleException,
185            IOException {
186        try {
187            final RootNode rootNode = new RootNode(JavaParser.parseFile(file,
188                JavaParser.Options.WITH_COMMENTS));
189            final List<NodeInfo> matchingItems = getXpathItems(xpath, rootNode);
190            return matchingItems.stream()
191                .map(item -> ((ElementNode) item).getUnderlyingNode())
192                .map(AstTreeStringPrinter::printBranch)
193                .collect(Collectors.joining(DELIMITER));
194        }
195        catch (XPathException ex) {
196            final String errMsg = String.format(Locale.ROOT,
197                "Error during evaluation for xpath: %s, file: %s", xpath, file.getCanonicalPath());
198            throw new CheckstyleException(errMsg, ex);
199        }
200    }
201
202    /**
203     * Returns list of nodes matching xpath expression given node context.
204     *
205     * @param xpath Xpath expression
206     * @param rootNode {@code NodeInfo} node context
207     * @return list of nodes matching xpath expression given node context
208     * @throws XPathException if Xpath cannot be parsed
209     */
210    public static List<NodeInfo> getXpathItems(String xpath, AbstractNode rootNode)
211            throws XPathException {
212        final XPathEvaluator xpathEvaluator = new XPathEvaluator(Configuration.newConfiguration());
213        final XPathExpression xpathExpression = xpathEvaluator.createExpression(xpath);
214        final XPathDynamicContext xpathDynamicContext = xpathExpression
215                .createDynamicContext(rootNode);
216        final List<Item> items = xpathExpression.evaluate(xpathDynamicContext);
217        return UnmodifiableCollectionUtil.unmodifiableList(items, NodeInfo.class);
218    }
219}