1 /////////////////////////////////////////////////////////////////////////////////////////////// 2 // checkstyle: Checks Java source code and other text files for adherence to a set of rules. 3 // Copyright (C) 2001-2024 the original author or authors. 4 // 5 // This library is free software; you can redistribute it and/or 6 // modify it under the terms of the GNU Lesser General Public 7 // License as published by the Free Software Foundation; either 8 // version 2.1 of the License, or (at your option) any later version. 9 // 10 // This library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 // Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public 16 // License along with this library; if not, write to the Free Software 17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 /////////////////////////////////////////////////////////////////////////////////////////////// 19 20 package com.puppycrawl.tools.checkstyle.checks.header; 21 22 import java.io.File; 23 import java.util.ArrayList; 24 import java.util.BitSet; 25 import java.util.List; 26 import java.util.regex.Pattern; 27 import java.util.regex.PatternSyntaxException; 28 29 import com.puppycrawl.tools.checkstyle.StatelessCheck; 30 import com.puppycrawl.tools.checkstyle.api.FileText; 31 import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 32 import com.puppycrawl.tools.checkstyle.utils.TokenUtil; 33 34 /** 35 * <p> 36 * Checks the header of a source file against a header that contains a 37 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Pattern.html"> 38 * pattern</a> for each line of the source header. 39 * </p> 40 * <p> 41 * Rationale: In some projects <a href="https://checkstyle.org/checks/header/header.html#Header"> 42 * checking against a fixed header</a> is not sufficient, e.g. the header might 43 * require a copyright line where the year information is not static. 44 * </p> 45 * <p> 46 * For example, consider the following header: 47 * </p> 48 * <pre> 49 * line 1: ^/{71}$ 50 * line 2: ^// checkstyle:$ 51 * line 3: ^// Checks Java source code for adherence to a set of rules\.$ 52 * line 4: ^// Copyright \(C\) \d\d\d\d Oliver Burn$ 53 * line 5: ^// Last modification by \$Author.*\$$ 54 * line 6: ^/{71}$ 55 * line 7: 56 * line 8: ^package 57 * line 9: 58 * line 10: ^import 59 * line 11: 60 * line 12: ^/\*\* 61 * line 13: ^ \*([^/]|$) 62 * line 14: ^ \*/ 63 * </pre> 64 * <p> 65 * Lines 1 and 6 demonstrate a more compact notation for 71 '/' characters. 66 * Line 4 enforces that the copyright notice includes a four digit year. 67 * Line 5 is an example how to enforce revision control keywords in a file header. 68 * Lines 12-14 is a template for javadoc (line 13 is so complicated to remove 69 * conflict with and of javadoc comment). Lines 7, 9 and 11 will be treated 70 * as '^$' and will forcefully expect the line to be empty. 71 * </p> 72 * <p> 73 * Different programming languages have different comment syntax rules, 74 * but all of them start a comment with a non-word character. 75 * Hence, you can often use the non-word character class to abstract away 76 * the concrete comment syntax and allow checking the header for different 77 * languages with a single header definition. For example, consider the following 78 * header specification (note that this is not the full Apache license header): 79 * </p> 80 * <pre> 81 * line 1: ^#! 82 * line 2: ^<\?xml.*>$ 83 * line 3: ^\W*$ 84 * line 4: ^\W*Copyright 2006 The Apache Software Foundation or its licensors, as applicable\.$ 85 * line 5: ^\W*Licensed under the Apache License, Version 2\.0 \(the "License"\);$ 86 * line 6: ^\W*$ 87 * </pre> 88 * <p> 89 * Lines 1 and 2 leave room for technical header lines, e.g. the "#!/bin/sh" 90 * line in Unix shell scripts, or the XML file header of XML files. 91 * Set the multiline property to "1, 2" so these lines can be ignored for 92 * file types where they do no apply. Lines 3 through 6 define the actual header content. 93 * Note how lines 2, 4 and 5 use escapes for characters that have special regexp semantics. 94 * </p> 95 * <p> 96 * In default configuration, if header is not specified, the default value 97 * of header is set to null and the check does not rise any violations. 98 * </p> 99 * <ul> 100 * <li> 101 * Property {@code charset} - Specify the character encoding to use when reading the headerFile. 102 * Type is {@code java.lang.String}. 103 * Default value is {@code the charset property of the parent 104 * <a href="https://checkstyle.org/config.html#Checker">Checker</a> module}. 105 * </li> 106 * <li> 107 * Property {@code fileExtensions} - Specify the file extensions of the files to process. 108 * Type is {@code java.lang.String[]}. 109 * Default value is {@code ""}. 110 * </li> 111 * <li> 112 * Property {@code header} - Define the required header specified inline. 113 * Individual header lines must be separated by the string {@code "\n"} 114 * (even on platforms with a different line separator). 115 * For header lines containing {@code "\n\n"} checkstyle will 116 * forcefully expect an empty line to exist. See examples below. 117 * Regular expressions must not span multiple lines. 118 * Type is {@code java.lang.String}. 119 * Default value is {@code null}. 120 * </li> 121 * <li> 122 * Property {@code headerFile} - Specify the name of the file containing the required header. 123 * Type is {@code java.net.URI}. 124 * Default value is {@code null}. 125 * </li> 126 * <li> 127 * Property {@code multiLines} - Specify the line numbers to repeat (zero or more times). 128 * Type is {@code int[]}. 129 * Default value is {@code ""}. 130 * </li> 131 * </ul> 132 * <p> 133 * Parent is {@code com.puppycrawl.tools.checkstyle.Checker} 134 * </p> 135 * <p> 136 * Violation Message Keys: 137 * </p> 138 * <ul> 139 * <li> 140 * {@code header.mismatch} 141 * </li> 142 * <li> 143 * {@code header.missing} 144 * </li> 145 * </ul> 146 * 147 * @since 6.9 148 */ 149 @StatelessCheck 150 public class RegexpHeaderCheck extends AbstractHeaderCheck { 151 152 /** 153 * A key is pointing to the warning message text in "messages.properties" 154 * file. 155 */ 156 public static final String MSG_HEADER_MISSING = "header.missing"; 157 158 /** 159 * A key is pointing to the warning message text in "messages.properties" 160 * file. 161 */ 162 public static final String MSG_HEADER_MISMATCH = "header.mismatch"; 163 164 /** Regex pattern for a blank line. **/ 165 private static final String EMPTY_LINE_PATTERN = "^$"; 166 167 /** Compiled regex pattern for a blank line. **/ 168 private static final Pattern BLANK_LINE = Pattern.compile(EMPTY_LINE_PATTERN); 169 170 /** The compiled regular expressions. */ 171 private final List<Pattern> headerRegexps = new ArrayList<>(); 172 173 /** Specify the line numbers to repeat (zero or more times). */ 174 private BitSet multiLines = new BitSet(); 175 176 /** 177 * Setter to specify the line numbers to repeat (zero or more times). 178 * 179 * @param list line numbers to repeat in header. 180 * @since 3.4 181 */ 182 public void setMultiLines(int... list) { 183 multiLines = TokenUtil.asBitSet(list); 184 } 185 186 @Override 187 protected void processFiltered(File file, FileText fileText) { 188 final int headerSize = getHeaderLines().size(); 189 final int fileSize = fileText.size(); 190 191 if (headerSize - multiLines.cardinality() > fileSize) { 192 log(1, MSG_HEADER_MISSING); 193 } 194 else { 195 int headerLineNo = 0; 196 int index; 197 for (index = 0; headerLineNo < headerSize && index < fileSize; index++) { 198 final String line = fileText.get(index); 199 boolean isMatch = isMatch(line, headerLineNo); 200 while (!isMatch && isMultiLine(headerLineNo)) { 201 headerLineNo++; 202 isMatch = headerLineNo == headerSize 203 || isMatch(line, headerLineNo); 204 } 205 if (!isMatch) { 206 log(index + 1, MSG_HEADER_MISMATCH, getHeaderLine(headerLineNo)); 207 break; 208 } 209 if (!isMultiLine(headerLineNo)) { 210 headerLineNo++; 211 } 212 } 213 if (index == fileSize) { 214 // if file finished, but we have at least one non-multi-line 215 // header isn't completed 216 logFirstSinglelineLine(headerLineNo, headerSize); 217 } 218 } 219 } 220 221 /** 222 * Returns the line from the header. Where the line is blank return the regexp pattern 223 * for a blank line. 224 * 225 * @param headerLineNo header line number to return 226 * @return the line from the header 227 */ 228 private String getHeaderLine(int headerLineNo) { 229 String line = getHeaderLines().get(headerLineNo); 230 if (line.isEmpty()) { 231 line = EMPTY_LINE_PATTERN; 232 } 233 return line; 234 } 235 236 /** 237 * Logs warning if any non-multiline lines left in header regexp. 238 * 239 * @param startHeaderLine header line number to start from 240 * @param headerSize whole header size 241 */ 242 private void logFirstSinglelineLine(int startHeaderLine, int headerSize) { 243 for (int lineNum = startHeaderLine; lineNum < headerSize; lineNum++) { 244 if (!isMultiLine(lineNum)) { 245 log(1, MSG_HEADER_MISSING); 246 break; 247 } 248 } 249 } 250 251 /** 252 * Checks if a code line matches the required header line. 253 * 254 * @param line the code line 255 * @param headerLineNo the header line number. 256 * @return true if and only if the line matches the required header line. 257 */ 258 private boolean isMatch(String line, int headerLineNo) { 259 return headerRegexps.get(headerLineNo).matcher(line).find(); 260 } 261 262 /** 263 * Returns true if line is multiline header lines or false. 264 * 265 * @param lineNo a line number 266 * @return if {@code lineNo} is one of the repeat header lines. 267 */ 268 private boolean isMultiLine(int lineNo) { 269 return multiLines.get(lineNo + 1); 270 } 271 272 @Override 273 protected void postProcessHeaderLines() { 274 final List<String> headerLines = getHeaderLines(); 275 for (String line : headerLines) { 276 try { 277 if (line.isEmpty()) { 278 headerRegexps.add(BLANK_LINE); 279 } 280 else { 281 headerRegexps.add(Pattern.compile(line)); 282 } 283 } 284 catch (final PatternSyntaxException ex) { 285 throw new IllegalArgumentException("line " 286 + (headerRegexps.size() + 1) 287 + " in header specification" 288 + " is not a regular expression", ex); 289 } 290 } 291 } 292 293 /** 294 * Setter to define the required header specified inline. 295 * Individual header lines must be separated by the string {@code "\n"} 296 * (even on platforms with a different line separator). 297 * For header lines containing {@code "\n\n"} checkstyle will forcefully 298 * expect an empty line to exist. See examples below. 299 * Regular expressions must not span multiple lines. 300 * 301 * @param header the header value to validate and set (in that order) 302 * @since 5.0 303 */ 304 @Override 305 public void setHeader(String header) { 306 if (!CommonUtil.isBlank(header)) { 307 if (!CommonUtil.isPatternValid(header)) { 308 throw new IllegalArgumentException("Unable to parse format: " + header); 309 } 310 super.setHeader(header); 311 } 312 } 313 314 }