View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2024 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.checks.header;
21  
22  import java.io.File;
23  import java.util.ArrayList;
24  import java.util.BitSet;
25  import java.util.List;
26  import java.util.regex.Pattern;
27  import java.util.regex.PatternSyntaxException;
28  
29  import com.puppycrawl.tools.checkstyle.StatelessCheck;
30  import com.puppycrawl.tools.checkstyle.api.FileText;
31  import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
32  import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
33  
34  /**
35   * <p>
36   * Checks the header of a source file against a header that contains a
37   * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Pattern.html">
38   * pattern</a> for each line of the source header.
39   * </p>
40   * <p>
41   * Rationale: In some projects <a href="https://checkstyle.org/checks/header/header.html#Header">
42   * checking against a fixed header</a> is not sufficient, e.g. the header might
43   * require a copyright line where the year information is not static.
44   * </p>
45   * <p>
46   * For example, consider the following header:
47   * </p>
48   * <pre>
49   * line  1: ^/{71}$
50   * line  2: ^// checkstyle:$
51   * line  3: ^// Checks Java source code for adherence to a set of rules\.$
52   * line  4: ^// Copyright \(C\) \d\d\d\d  Oliver Burn$
53   * line  5: ^// Last modification by \$Author.*\$$
54   * line  6: ^/{71}$
55   * line  7:
56   * line  8: ^package
57   * line  9:
58   * line 10: ^import
59   * line 11:
60   * line 12: ^/\*\*
61   * line 13: ^ \*([^/]|$)
62   * line 14: ^ \*&#47;
63   * </pre>
64   * <p>
65   * Lines 1 and 6 demonstrate a more compact notation for 71 '/' characters.
66   * Line 4 enforces that the copyright notice includes a four digit year.
67   * Line 5 is an example how to enforce revision control keywords in a file header.
68   * Lines 12-14 is a template for javadoc (line 13 is so complicated to remove
69   * conflict with and of javadoc comment). Lines 7, 9 and 11 will be treated
70   * as '^$' and will forcefully expect the line to be empty.
71   * </p>
72   * <p>
73   * Different programming languages have different comment syntax rules,
74   * but all of them start a comment with a non-word character.
75   * Hence, you can often use the non-word character class to abstract away
76   * the concrete comment syntax and allow checking the header for different
77   * languages with a single header definition. For example, consider the following
78   * header specification (note that this is not the full Apache license header):
79   * </p>
80   * <pre>
81   * line 1: ^#!
82   * line 2: ^&lt;\?xml.*&gt;$
83   * line 3: ^\W*$
84   * line 4: ^\W*Copyright 2006 The Apache Software Foundation or its licensors, as applicable\.$
85   * line 5: ^\W*Licensed under the Apache License, Version 2\.0 \(the "License"\);$
86   * line 6: ^\W*$
87   * </pre>
88   * <p>
89   * Lines 1 and 2 leave room for technical header lines, e.g. the "#!/bin/sh"
90   * line in Unix shell scripts, or the XML file header of XML files.
91   * Set the multiline property to "1, 2" so these lines can be ignored for
92   * file types where they do no apply. Lines 3 through 6 define the actual header content.
93   * Note how lines 2, 4 and 5 use escapes for characters that have special regexp semantics.
94   * </p>
95   * <p>
96   * In default configuration, if header is not specified, the default value
97   * of header is set to null and the check does not rise any violations.
98   * </p>
99   * <ul>
100  * <li>
101  * Property {@code charset} - Specify the character encoding to use when reading the headerFile.
102  * Type is {@code java.lang.String}.
103  * Default value is {@code the charset property of the parent
104  * <a href="https://checkstyle.org/config.html#Checker">Checker</a> module}.
105  * </li>
106  * <li>
107  * Property {@code fileExtensions} - Specify the file extensions of the files to process.
108  * Type is {@code java.lang.String[]}.
109  * Default value is {@code ""}.
110  * </li>
111  * <li>
112  * Property {@code header} - Define the required header specified inline.
113  * Individual header lines must be separated by the string {@code "\n"}
114  * (even on platforms with a different line separator).
115  * For header lines containing {@code "\n\n"} checkstyle will
116  * forcefully expect an empty line to exist. See examples below.
117  * Regular expressions must not span multiple lines.
118  * Type is {@code java.lang.String}.
119  * Default value is {@code null}.
120  * </li>
121  * <li>
122  * Property {@code headerFile} - Specify the name of the file containing the required header.
123  * Type is {@code java.net.URI}.
124  * Default value is {@code null}.
125  * </li>
126  * <li>
127  * Property {@code multiLines} - Specify the line numbers to repeat (zero or more times).
128  * Type is {@code int[]}.
129  * Default value is {@code ""}.
130  * </li>
131  * </ul>
132  * <p>
133  * Parent is {@code com.puppycrawl.tools.checkstyle.Checker}
134  * </p>
135  * <p>
136  * Violation Message Keys:
137  * </p>
138  * <ul>
139  * <li>
140  * {@code header.mismatch}
141  * </li>
142  * <li>
143  * {@code header.missing}
144  * </li>
145  * </ul>
146  *
147  * @since 6.9
148  */
149 @StatelessCheck
150 public class RegexpHeaderCheck extends AbstractHeaderCheck {
151 
152     /**
153      * A key is pointing to the warning message text in "messages.properties"
154      * file.
155      */
156     public static final String MSG_HEADER_MISSING = "header.missing";
157 
158     /**
159      * A key is pointing to the warning message text in "messages.properties"
160      * file.
161      */
162     public static final String MSG_HEADER_MISMATCH = "header.mismatch";
163 
164     /** Regex pattern for a blank line. **/
165     private static final String EMPTY_LINE_PATTERN = "^$";
166 
167     /** Compiled regex pattern for a blank line. **/
168     private static final Pattern BLANK_LINE = Pattern.compile(EMPTY_LINE_PATTERN);
169 
170     /** The compiled regular expressions. */
171     private final List<Pattern> headerRegexps = new ArrayList<>();
172 
173     /** Specify the line numbers to repeat (zero or more times). */
174     private BitSet multiLines = new BitSet();
175 
176     /**
177      * Setter to specify the line numbers to repeat (zero or more times).
178      *
179      * @param list line numbers to repeat in header.
180      * @since 3.4
181      */
182     public void setMultiLines(int... list) {
183         multiLines = TokenUtil.asBitSet(list);
184     }
185 
186     @Override
187     protected void processFiltered(File file, FileText fileText) {
188         final int headerSize = getHeaderLines().size();
189         final int fileSize = fileText.size();
190 
191         if (headerSize - multiLines.cardinality() > fileSize) {
192             log(1, MSG_HEADER_MISSING);
193         }
194         else {
195             int headerLineNo = 0;
196             int index;
197             for (index = 0; headerLineNo < headerSize && index < fileSize; index++) {
198                 final String line = fileText.get(index);
199                 boolean isMatch = isMatch(line, headerLineNo);
200                 while (!isMatch && isMultiLine(headerLineNo)) {
201                     headerLineNo++;
202                     isMatch = headerLineNo == headerSize
203                             || isMatch(line, headerLineNo);
204                 }
205                 if (!isMatch) {
206                     log(index + 1, MSG_HEADER_MISMATCH, getHeaderLine(headerLineNo));
207                     break;
208                 }
209                 if (!isMultiLine(headerLineNo)) {
210                     headerLineNo++;
211                 }
212             }
213             if (index == fileSize) {
214                 // if file finished, but we have at least one non-multi-line
215                 // header isn't completed
216                 logFirstSinglelineLine(headerLineNo, headerSize);
217             }
218         }
219     }
220 
221     /**
222      * Returns the line from the header. Where the line is blank return the regexp pattern
223      * for a blank line.
224      *
225      * @param headerLineNo header line number to return
226      * @return the line from the header
227      */
228     private String getHeaderLine(int headerLineNo) {
229         String line = getHeaderLines().get(headerLineNo);
230         if (line.isEmpty()) {
231             line = EMPTY_LINE_PATTERN;
232         }
233         return line;
234     }
235 
236     /**
237      * Logs warning if any non-multiline lines left in header regexp.
238      *
239      * @param startHeaderLine header line number to start from
240      * @param headerSize whole header size
241      */
242     private void logFirstSinglelineLine(int startHeaderLine, int headerSize) {
243         for (int lineNum = startHeaderLine; lineNum < headerSize; lineNum++) {
244             if (!isMultiLine(lineNum)) {
245                 log(1, MSG_HEADER_MISSING);
246                 break;
247             }
248         }
249     }
250 
251     /**
252      * Checks if a code line matches the required header line.
253      *
254      * @param line the code line
255      * @param headerLineNo the header line number.
256      * @return true if and only if the line matches the required header line.
257      */
258     private boolean isMatch(String line, int headerLineNo) {
259         return headerRegexps.get(headerLineNo).matcher(line).find();
260     }
261 
262     /**
263      * Returns true if line is multiline header lines or false.
264      *
265      * @param lineNo a line number
266      * @return if {@code lineNo} is one of the repeat header lines.
267      */
268     private boolean isMultiLine(int lineNo) {
269         return multiLines.get(lineNo + 1);
270     }
271 
272     @Override
273     protected void postProcessHeaderLines() {
274         final List<String> headerLines = getHeaderLines();
275         for (String line : headerLines) {
276             try {
277                 if (line.isEmpty()) {
278                     headerRegexps.add(BLANK_LINE);
279                 }
280                 else {
281                     headerRegexps.add(Pattern.compile(line));
282                 }
283             }
284             catch (final PatternSyntaxException ex) {
285                 throw new IllegalArgumentException("line "
286                         + (headerRegexps.size() + 1)
287                         + " in header specification"
288                         + " is not a regular expression", ex);
289             }
290         }
291     }
292 
293     /**
294      * Setter to define the required header specified inline.
295      * Individual header lines must be separated by the string {@code "\n"}
296      * (even on platforms with a different line separator).
297      * For header lines containing {@code "\n\n"} checkstyle will forcefully
298      * expect an empty line to exist. See examples below.
299      * Regular expressions must not span multiple lines.
300      *
301      * @param header the header value to validate and set (in that order)
302      * @since 5.0
303      */
304     @Override
305     public void setHeader(String header) {
306         if (!CommonUtil.isBlank(header)) {
307             if (!CommonUtil.isPatternValid(header)) {
308                 throw new IllegalArgumentException("Unable to parse format: " + header);
309             }
310             super.setHeader(header);
311         }
312     }
313 
314 }