001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2022 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.header;
021
022import java.io.File;
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.List;
026import java.util.regex.Pattern;
027import java.util.regex.PatternSyntaxException;
028
029import com.puppycrawl.tools.checkstyle.StatelessCheck;
030import com.puppycrawl.tools.checkstyle.api.FileText;
031import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
032
033/**
034 * <p>
035 * Checks the header of a source file against a header that contains a
036 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Pattern.html">
037 * pattern</a> for each line of the source header.
038 * </p>
039 * <p>
040 * Rationale: In some projects <a href="https://checkstyle.org/config_header.html#Header">
041 * checking against a fixed header</a> is not sufficient, e.g. the header might
042 * require a copyright line where the year information is not static.
043 * </p>
044 * <p>
045 * For example, consider the following header:
046 * </p>
047 * <pre>
048 * line  1: ^/{71}$
049 * line  2: ^// checkstyle:$
050 * line  3: ^// Checks Java source code for adherence to a set of rules\.$
051 * line  4: ^// Copyright \(C\) \d\d\d\d  Oliver Burn$
052 * line  5: ^// Last modification by \$Author.*\$$
053 * line  6: ^/{71}$
054 * line  7:
055 * line  8: ^package
056 * line  9:
057 * line 10: ^import
058 * line 11:
059 * line 12: ^/\*\*
060 * line 13: ^ \*([^/]|$)
061 * line 14: ^ \*&#47;
062 * </pre>
063 * <p>
064 * Lines 1 and 6 demonstrate a more compact notation for 71 '/' characters.
065 * Line 4 enforces that the copyright notice includes a four digit year.
066 * Line 5 is an example how to enforce revision control keywords in a file header.
067 * Lines 12-14 is a template for javadoc (line 13 is so complicated to remove
068 * conflict with and of javadoc comment). Lines 7, 9 and 11 will be treated
069 * as '^$' and will forcefully expect the line to be empty.
070 * </p>
071 * <p>
072 * Different programming languages have different comment syntax rules,
073 * but all of them start a comment with a non-word character.
074 * Hence you can often use the non-word character class to abstract away
075 * the concrete comment syntax and allow checking the header for different
076 * languages with a single header definition. For example, consider the following
077 * header specification (note that this is not the full Apache license header):
078 * </p>
079 * <pre>
080 * line 1: ^#!
081 * line 2: ^&lt;\?xml.*&gt;$
082 * line 3: ^\W*$
083 * line 4: ^\W*Copyright 2006 The Apache Software Foundation or its licensors, as applicable\.$
084 * line 5: ^\W*Licensed under the Apache License, Version 2\.0 \(the "License"\);$
085 * line 6: ^\W*$
086 * </pre>
087 * <p>
088 * Lines 1 and 2 leave room for technical header lines, e.g. the "#!/bin/sh"
089 * line in Unix shell scripts, or the XML file header of XML files.
090 * Set the multiline property to "1, 2" so these lines can be ignored for
091 * file types where they do no apply. Lines 3 through 6 define the actual header content.
092 * Note how lines 2, 4 and 5 use escapes for characters that have special regexp semantics.
093 * </p>
094 * <p>
095 * In default configuration, if header is not specified, the default value
096 * of header is set to null and the check does not rise any violations.
097 * </p>
098 * <ul>
099 * <li>
100 * Property {@code headerFile} - Specify the name of the file containing the required header.
101 * Type is {@code java.net.URI}.
102 * Default value is {@code null}.
103 * </li>
104 * <li>
105 * Property {@code charset} - Specify the character encoding to use when reading the headerFile.
106 * Type is {@code java.lang.String}.
107 * Default value is {@code the charset property of the parent
108 * <a href="https://checkstyle.org/config.html#Checker">Checker</a> module}.
109 * </li>
110 * <li>
111 * Property {@code header} - Define the required header specified inline.
112 * Individual header lines must be separated by the string {@code "\n"}
113 * (even on platforms with a different line separator).
114 * For header lines containing {@code "\n\n"} checkstyle will
115 * forcefully expect an empty line to exist. See examples below.
116 * Regular expressions must not span multiple lines.
117 * Type is {@code java.lang.String}.
118 * Default value is {@code null}.
119 * </li>
120 * <li>
121 * Property {@code multiLines} - Specify the line numbers to repeat (zero or more times).
122 * Type is {@code int[]}.
123 * Default value is {@code ""}.
124 * </li>
125 * <li>
126 * Property {@code fileExtensions} - Specify the file type extension of files to process.
127 * Type is {@code java.lang.String[]}.
128 * Default value is {@code ""}.
129 * </li>
130 * </ul>
131 * <p>
132 * To configure the check such that no violations arise.
133 * Default values of properties are used.
134 * </p>
135 * <pre>
136 * &lt;module name="RegexpHeader"/&gt;
137 * </pre>
138 * <p>
139 * To configure the check to use header file {@code "config/java.header"} and
140 * {@code 10} and {@code 13} multi-lines:
141 * </p>
142 * <pre>
143 * &lt;module name="RegexpHeader"&gt;
144 *   &lt;property name="headerFile" value="config/java.header"/&gt;
145 *   &lt;property name="multiLines" value="10, 13"/&gt;
146 * &lt;/module&gt;
147 * </pre>
148 * <p>
149 * To configure the check to verify that each file starts with the header
150 * </p>
151 * <pre>
152 * ^// Copyright \(C\) (\d\d\d\d -)? 2004 MyCompany$
153 * ^// All rights reserved$
154 * </pre>
155 * <p>
156 * without the need for an external header file:
157 * </p>
158 * <pre>
159 * &lt;module name="RegexpHeader"&gt;
160 *   &lt;property
161 *     name="header"
162 *     value="^// Copyright \(C\) (\d\d\d\d -)? 2004 MyCompany$
163 *       \n^// All rights reserved$"/&gt;
164 * &lt;/module&gt;
165 * </pre>
166 * <p>
167 * For regex containing {@code "\n\n"}
168 * </p>
169 * <pre>
170 * &lt;module name="RegexpHeader"&gt;
171 *   &lt;property
172 *     name="header"
173 *     value="^package .*\n\n.*"/&gt;
174 * &lt;/module&gt;
175 * </pre>
176 * <p>
177 * {@code "\n\n"} will be treated as '^$' and will forcefully expect the line
178 * to be empty. For example -
179 * </p>
180 * <pre>
181 * package com.some.package;
182 * public class ThisWillFail { }
183 * </pre>
184 * <p>
185 * would fail for the regex above. Expected -
186 * </p>
187 * <pre>
188 * package com.some.package;
189 *
190 * public class ThisWillPass { }
191 * </pre>
192 * <p>
193 * <u>Note</u>: {@code ignoreLines} property has been removed from this check to simplify it.
194 * To make some line optional use "^.*$" regexp for this line.
195 * </p>
196 * <p>
197 * Parent is {@code com.puppycrawl.tools.checkstyle.Checker}
198 * </p>
199 * <p>
200 * Violation Message Keys:
201 * </p>
202 * <ul>
203 * <li>
204 * {@code header.mismatch}
205 * </li>
206 * <li>
207 * {@code header.missing}
208 * </li>
209 * </ul>
210 *
211 * @since 6.9
212 */
213@StatelessCheck
214public class RegexpHeaderCheck extends AbstractHeaderCheck {
215
216    /**
217     * A key is pointing to the warning message text in "messages.properties"
218     * file.
219     */
220    public static final String MSG_HEADER_MISSING = "header.missing";
221
222    /**
223     * A key is pointing to the warning message text in "messages.properties"
224     * file.
225     */
226    public static final String MSG_HEADER_MISMATCH = "header.mismatch";
227
228    /** Empty array to avoid instantiations. */
229    private static final int[] EMPTY_INT_ARRAY = new int[0];
230
231    /** Regex pattern for a blank line. **/
232    private static final String EMPTY_LINE_PATTERN = "^$";
233
234    /** Compiled regex pattern for a blank line. **/
235    private static final Pattern BLANK_LINE = Pattern.compile(EMPTY_LINE_PATTERN);
236
237    /** The compiled regular expressions. */
238    private final List<Pattern> headerRegexps = new ArrayList<>();
239
240    /** Specify the line numbers to repeat (zero or more times). */
241    private int[] multiLines = EMPTY_INT_ARRAY;
242
243    /**
244     * Setter to specify the line numbers to repeat (zero or more times).
245     *
246     * @param list comma separated list of line numbers to repeat in header.
247     */
248    public void setMultiLines(int... list) {
249        multiLines = new int[list.length];
250        System.arraycopy(list, 0, multiLines, 0, list.length);
251        Arrays.sort(multiLines);
252    }
253
254    @Override
255    protected void processFiltered(File file, FileText fileText) {
256        final int headerSize = getHeaderLines().size();
257        final int fileSize = fileText.size();
258
259        if (headerSize - multiLines.length > fileSize) {
260            log(1, MSG_HEADER_MISSING);
261        }
262        else {
263            int headerLineNo = 0;
264            int index;
265            for (index = 0; headerLineNo < headerSize && index < fileSize; index++) {
266                final String line = fileText.get(index);
267                boolean isMatch = isMatch(line, headerLineNo);
268                while (!isMatch && isMultiLine(headerLineNo)) {
269                    headerLineNo++;
270                    isMatch = headerLineNo == headerSize
271                            || isMatch(line, headerLineNo);
272                }
273                if (!isMatch) {
274                    log(index + 1, MSG_HEADER_MISMATCH, getHeaderLine(headerLineNo));
275                    break;
276                }
277                if (!isMultiLine(headerLineNo)) {
278                    headerLineNo++;
279                }
280            }
281            if (index == fileSize) {
282                // if file finished, but we have at least one non-multi-line
283                // header isn't completed
284                logFirstSinglelineLine(headerLineNo, headerSize);
285            }
286        }
287    }
288
289    /**
290     * Returns the line from the header. Where the line is blank return the regexp pattern
291     * for a blank line.
292     *
293     * @param headerLineNo header line number to return
294     * @return the line from the header
295     */
296    private String getHeaderLine(int headerLineNo) {
297        String line = getHeaderLines().get(headerLineNo);
298        if (line.isEmpty()) {
299            line = EMPTY_LINE_PATTERN;
300        }
301        return line;
302    }
303
304    /**
305     * Logs warning if any non-multiline lines left in header regexp.
306     *
307     * @param startHeaderLine header line number to start from
308     * @param headerSize whole header size
309     */
310    private void logFirstSinglelineLine(int startHeaderLine, int headerSize) {
311        for (int lineNum = startHeaderLine; lineNum < headerSize; lineNum++) {
312            if (!isMultiLine(lineNum)) {
313                log(1, MSG_HEADER_MISSING);
314                break;
315            }
316        }
317    }
318
319    /**
320     * Checks if a code line matches the required header line.
321     *
322     * @param line the code line
323     * @param headerLineNo the header line number.
324     * @return true if and only if the line matches the required header line.
325     */
326    private boolean isMatch(String line, int headerLineNo) {
327        return headerRegexps.get(headerLineNo).matcher(line).find();
328    }
329
330    /**
331     * Returns true if line is multiline header lines or false.
332     *
333     * @param lineNo a line number
334     * @return if {@code lineNo} is one of the repeat header lines.
335     */
336    private boolean isMultiLine(int lineNo) {
337        return Arrays.binarySearch(multiLines, lineNo + 1) >= 0;
338    }
339
340    @Override
341    protected void postProcessHeaderLines() {
342        final List<String> headerLines = getHeaderLines();
343        for (String line : headerLines) {
344            try {
345                if (line.isEmpty()) {
346                    headerRegexps.add(BLANK_LINE);
347                }
348                else {
349                    headerRegexps.add(Pattern.compile(line));
350                }
351            }
352            catch (final PatternSyntaxException ex) {
353                throw new IllegalArgumentException("line "
354                        + (headerRegexps.size() + 1)
355                        + " in header specification"
356                        + " is not a regular expression", ex);
357            }
358        }
359    }
360
361    /**
362     * Setter to define the required header specified inline.
363     * Individual header lines must be separated by the string {@code "\n"}
364     * (even on platforms with a different line separator).
365     * For header lines containing {@code "\n\n"} checkstyle will forcefully
366     * expect an empty line to exist. See examples below.
367     * Regular expressions must not span multiple lines.
368     *
369     * @param header the header value to validate and set (in that order)
370     */
371    @Override
372    public void setHeader(String header) {
373        if (!CommonUtil.isBlank(header)) {
374            if (!CommonUtil.isPatternValid(header)) {
375                throw new IllegalArgumentException("Unable to parse format: " + header);
376            }
377            super.setHeader(header);
378        }
379    }
380
381}