001//////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code for adherence to a set of rules. 003// Copyright (C) 2001-2022 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018//////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.header; 021 022import java.io.File; 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.List; 026import java.util.regex.Pattern; 027import java.util.regex.PatternSyntaxException; 028 029import com.puppycrawl.tools.checkstyle.StatelessCheck; 030import com.puppycrawl.tools.checkstyle.api.FileText; 031import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 032 033/** 034 * <p> 035 * Checks the header of a source file against a header that contains a 036 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Pattern.html"> 037 * pattern</a> for each line of the source header. 038 * </p> 039 * <p> 040 * Rationale: In some projects <a href="https://checkstyle.org/config_header.html#Header"> 041 * checking against a fixed header</a> is not sufficient, e.g. the header might 042 * require a copyright line where the year information is not static. 043 * </p> 044 * <p> 045 * For example, consider the following header: 046 * </p> 047 * <pre> 048 * line 1: ^/{71}$ 049 * line 2: ^// checkstyle:$ 050 * line 3: ^// Checks Java source code for adherence to a set of rules\.$ 051 * line 4: ^// Copyright \(C\) \d\d\d\d Oliver Burn$ 052 * line 5: ^// Last modification by \$Author.*\$$ 053 * line 6: ^/{71}$ 054 * line 7: 055 * line 8: ^package 056 * line 9: 057 * line 10: ^import 058 * line 11: 059 * line 12: ^/\*\* 060 * line 13: ^ \*([^/]|$) 061 * line 14: ^ \*/ 062 * </pre> 063 * <p> 064 * Lines 1 and 6 demonstrate a more compact notation for 71 '/' characters. 065 * Line 4 enforces that the copyright notice includes a four digit year. 066 * Line 5 is an example how to enforce revision control keywords in a file header. 067 * Lines 12-14 is a template for javadoc (line 13 is so complicated to remove 068 * conflict with and of javadoc comment). Lines 7, 9 and 11 will be treated 069 * as '^$' and will forcefully expect the line to be empty. 070 * </p> 071 * <p> 072 * Different programming languages have different comment syntax rules, 073 * but all of them start a comment with a non-word character. 074 * Hence you can often use the non-word character class to abstract away 075 * the concrete comment syntax and allow checking the header for different 076 * languages with a single header definition. For example, consider the following 077 * header specification (note that this is not the full Apache license header): 078 * </p> 079 * <pre> 080 * line 1: ^#! 081 * line 2: ^<\?xml.*>$ 082 * line 3: ^\W*$ 083 * line 4: ^\W*Copyright 2006 The Apache Software Foundation or its licensors, as applicable\.$ 084 * line 5: ^\W*Licensed under the Apache License, Version 2\.0 \(the "License"\);$ 085 * line 6: ^\W*$ 086 * </pre> 087 * <p> 088 * Lines 1 and 2 leave room for technical header lines, e.g. the "#!/bin/sh" 089 * line in Unix shell scripts, or the XML file header of XML files. 090 * Set the multiline property to "1, 2" so these lines can be ignored for 091 * file types where they do no apply. Lines 3 through 6 define the actual header content. 092 * Note how lines 2, 4 and 5 use escapes for characters that have special regexp semantics. 093 * </p> 094 * <p> 095 * In default configuration, if header is not specified, the default value 096 * of header is set to null and the check does not rise any violations. 097 * </p> 098 * <ul> 099 * <li> 100 * Property {@code headerFile} - Specify the name of the file containing the required header. 101 * Type is {@code java.net.URI}. 102 * Default value is {@code null}. 103 * </li> 104 * <li> 105 * Property {@code charset} - Specify the character encoding to use when reading the headerFile. 106 * Type is {@code java.lang.String}. 107 * Default value is {@code the charset property of the parent 108 * <a href="https://checkstyle.org/config.html#Checker">Checker</a> module}. 109 * </li> 110 * <li> 111 * Property {@code header} - Define the required header specified inline. 112 * Individual header lines must be separated by the string {@code "\n"} 113 * (even on platforms with a different line separator). 114 * For header lines containing {@code "\n\n"} checkstyle will 115 * forcefully expect an empty line to exist. See examples below. 116 * Regular expressions must not span multiple lines. 117 * Type is {@code java.lang.String}. 118 * Default value is {@code null}. 119 * </li> 120 * <li> 121 * Property {@code multiLines} - Specify the line numbers to repeat (zero or more times). 122 * Type is {@code int[]}. 123 * Default value is {@code ""}. 124 * </li> 125 * <li> 126 * Property {@code fileExtensions} - Specify the file type extension of files to process. 127 * Type is {@code java.lang.String[]}. 128 * Default value is {@code ""}. 129 * </li> 130 * </ul> 131 * <p> 132 * To configure the check such that no violations arise. 133 * Default values of properties are used. 134 * </p> 135 * <pre> 136 * <module name="RegexpHeader"/> 137 * </pre> 138 * <p> 139 * To configure the check to use header file {@code "config/java.header"} and 140 * {@code 10} and {@code 13} multi-lines: 141 * </p> 142 * <pre> 143 * <module name="RegexpHeader"> 144 * <property name="headerFile" value="config/java.header"/> 145 * <property name="multiLines" value="10, 13"/> 146 * </module> 147 * </pre> 148 * <p> 149 * To configure the check to verify that each file starts with the header 150 * </p> 151 * <pre> 152 * ^// Copyright \(C\) (\d\d\d\d -)? 2004 MyCompany$ 153 * ^// All rights reserved$ 154 * </pre> 155 * <p> 156 * without the need for an external header file: 157 * </p> 158 * <pre> 159 * <module name="RegexpHeader"> 160 * <property 161 * name="header" 162 * value="^// Copyright \(C\) (\d\d\d\d -)? 2004 MyCompany$ 163 * \n^// All rights reserved$"/> 164 * </module> 165 * </pre> 166 * <p> 167 * For regex containing {@code "\n\n"} 168 * </p> 169 * <pre> 170 * <module name="RegexpHeader"> 171 * <property 172 * name="header" 173 * value="^package .*\n\n.*"/> 174 * </module> 175 * </pre> 176 * <p> 177 * {@code "\n\n"} will be treated as '^$' and will forcefully expect the line 178 * to be empty. For example - 179 * </p> 180 * <pre> 181 * package com.some.package; 182 * public class ThisWillFail { } 183 * </pre> 184 * <p> 185 * would fail for the regex above. Expected - 186 * </p> 187 * <pre> 188 * package com.some.package; 189 * 190 * public class ThisWillPass { } 191 * </pre> 192 * <p> 193 * <u>Note</u>: {@code ignoreLines} property has been removed from this check to simplify it. 194 * To make some line optional use "^.*$" regexp for this line. 195 * </p> 196 * <p> 197 * Parent is {@code com.puppycrawl.tools.checkstyle.Checker} 198 * </p> 199 * <p> 200 * Violation Message Keys: 201 * </p> 202 * <ul> 203 * <li> 204 * {@code header.mismatch} 205 * </li> 206 * <li> 207 * {@code header.missing} 208 * </li> 209 * </ul> 210 * 211 * @since 6.9 212 */ 213@StatelessCheck 214public class RegexpHeaderCheck extends AbstractHeaderCheck { 215 216 /** 217 * A key is pointing to the warning message text in "messages.properties" 218 * file. 219 */ 220 public static final String MSG_HEADER_MISSING = "header.missing"; 221 222 /** 223 * A key is pointing to the warning message text in "messages.properties" 224 * file. 225 */ 226 public static final String MSG_HEADER_MISMATCH = "header.mismatch"; 227 228 /** Empty array to avoid instantiations. */ 229 private static final int[] EMPTY_INT_ARRAY = new int[0]; 230 231 /** Regex pattern for a blank line. **/ 232 private static final String EMPTY_LINE_PATTERN = "^$"; 233 234 /** Compiled regex pattern for a blank line. **/ 235 private static final Pattern BLANK_LINE = Pattern.compile(EMPTY_LINE_PATTERN); 236 237 /** The compiled regular expressions. */ 238 private final List<Pattern> headerRegexps = new ArrayList<>(); 239 240 /** Specify the line numbers to repeat (zero or more times). */ 241 private int[] multiLines = EMPTY_INT_ARRAY; 242 243 /** 244 * Setter to specify the line numbers to repeat (zero or more times). 245 * 246 * @param list comma separated list of line numbers to repeat in header. 247 */ 248 public void setMultiLines(int... list) { 249 multiLines = new int[list.length]; 250 System.arraycopy(list, 0, multiLines, 0, list.length); 251 Arrays.sort(multiLines); 252 } 253 254 @Override 255 protected void processFiltered(File file, FileText fileText) { 256 final int headerSize = getHeaderLines().size(); 257 final int fileSize = fileText.size(); 258 259 if (headerSize - multiLines.length > fileSize) { 260 log(1, MSG_HEADER_MISSING); 261 } 262 else { 263 int headerLineNo = 0; 264 int index; 265 for (index = 0; headerLineNo < headerSize && index < fileSize; index++) { 266 final String line = fileText.get(index); 267 boolean isMatch = isMatch(line, headerLineNo); 268 while (!isMatch && isMultiLine(headerLineNo)) { 269 headerLineNo++; 270 isMatch = headerLineNo == headerSize 271 || isMatch(line, headerLineNo); 272 } 273 if (!isMatch) { 274 log(index + 1, MSG_HEADER_MISMATCH, getHeaderLine(headerLineNo)); 275 break; 276 } 277 if (!isMultiLine(headerLineNo)) { 278 headerLineNo++; 279 } 280 } 281 if (index == fileSize) { 282 // if file finished, but we have at least one non-multi-line 283 // header isn't completed 284 logFirstSinglelineLine(headerLineNo, headerSize); 285 } 286 } 287 } 288 289 /** 290 * Returns the line from the header. Where the line is blank return the regexp pattern 291 * for a blank line. 292 * 293 * @param headerLineNo header line number to return 294 * @return the line from the header 295 */ 296 private String getHeaderLine(int headerLineNo) { 297 String line = getHeaderLines().get(headerLineNo); 298 if (line.isEmpty()) { 299 line = EMPTY_LINE_PATTERN; 300 } 301 return line; 302 } 303 304 /** 305 * Logs warning if any non-multiline lines left in header regexp. 306 * 307 * @param startHeaderLine header line number to start from 308 * @param headerSize whole header size 309 */ 310 private void logFirstSinglelineLine(int startHeaderLine, int headerSize) { 311 for (int lineNum = startHeaderLine; lineNum < headerSize; lineNum++) { 312 if (!isMultiLine(lineNum)) { 313 log(1, MSG_HEADER_MISSING); 314 break; 315 } 316 } 317 } 318 319 /** 320 * Checks if a code line matches the required header line. 321 * 322 * @param line the code line 323 * @param headerLineNo the header line number. 324 * @return true if and only if the line matches the required header line. 325 */ 326 private boolean isMatch(String line, int headerLineNo) { 327 return headerRegexps.get(headerLineNo).matcher(line).find(); 328 } 329 330 /** 331 * Returns true if line is multiline header lines or false. 332 * 333 * @param lineNo a line number 334 * @return if {@code lineNo} is one of the repeat header lines. 335 */ 336 private boolean isMultiLine(int lineNo) { 337 return Arrays.binarySearch(multiLines, lineNo + 1) >= 0; 338 } 339 340 @Override 341 protected void postProcessHeaderLines() { 342 final List<String> headerLines = getHeaderLines(); 343 for (String line : headerLines) { 344 try { 345 if (line.isEmpty()) { 346 headerRegexps.add(BLANK_LINE); 347 } 348 else { 349 headerRegexps.add(Pattern.compile(line)); 350 } 351 } 352 catch (final PatternSyntaxException ex) { 353 throw new IllegalArgumentException("line " 354 + (headerRegexps.size() + 1) 355 + " in header specification" 356 + " is not a regular expression", ex); 357 } 358 } 359 } 360 361 /** 362 * Setter to define the required header specified inline. 363 * Individual header lines must be separated by the string {@code "\n"} 364 * (even on platforms with a different line separator). 365 * For header lines containing {@code "\n\n"} checkstyle will forcefully 366 * expect an empty line to exist. See examples below. 367 * Regular expressions must not span multiple lines. 368 * 369 * @param header the header value to validate and set (in that order) 370 */ 371 @Override 372 public void setHeader(String header) { 373 if (!CommonUtil.isBlank(header)) { 374 if (!CommonUtil.isPatternValid(header)) { 375 throw new IllegalArgumentException("Unable to parse format: " + header); 376 } 377 super.setHeader(header); 378 } 379 } 380 381}