View Javadoc
1   ///////////////////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3   // Copyright (C) 2001-2024 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ///////////////////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.api;
21  
22  import java.io.BufferedReader;
23  import java.io.File;
24  import java.io.FileNotFoundException;
25  import java.io.IOException;
26  import java.io.InputStream;
27  import java.io.InputStreamReader;
28  import java.io.Reader;
29  import java.io.StringReader;
30  import java.nio.charset.Charset;
31  import java.nio.charset.CharsetDecoder;
32  import java.nio.charset.CodingErrorAction;
33  import java.nio.charset.UnsupportedCharsetException;
34  import java.nio.file.Files;
35  import java.util.ArrayList;
36  import java.util.Arrays;
37  import java.util.List;
38  import java.util.regex.Matcher;
39  import java.util.regex.Pattern;
40  
41  import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
42  
43  /**
44   * Represents the text contents of a file of arbitrary plain text type.
45   *
46   * <p>
47   * This class will be passed to instances of class FileSetCheck by
48   * Checker.
49   * </p>
50   *
51   */
52  public final class FileText {
53  
54      /**
55       * The number of characters to read in one go.
56       */
57      private static final int READ_BUFFER_SIZE = 1024;
58  
59      /**
60       * Regular expression pattern matching all line terminators.
61       */
62      private static final Pattern LINE_TERMINATOR = Pattern.compile("\\n|\\r\\n?");
63  
64      // For now, we always keep both full text and lines array.
65      // In the long run, however, the one passed at initialization might be
66      // enough, while the other could be lazily created when requested.
67      // This would save memory but cost CPU cycles.
68  
69      /**
70       * The name of the file.
71       * {@code null} if no file name is available for whatever reason.
72       */
73      private final File file;
74  
75      /**
76       * The charset used to read the file.
77       * {@code null} if the file was reconstructed from a list of lines.
78       */
79      private final Charset charset;
80  
81      /**
82       * The lines of the file, without terminators.
83       */
84      private final String[] lines;
85  
86      /**
87       * The full text contents of the file.
88       *
89       * @noinspection FieldMayBeFinal
90       * @noinspectionreason FieldMayBeFinal - field is not final to ease reaching full test coverage.
91       */
92      private String fullText;
93  
94      /**
95       * The first position of each line within the full text.
96       */
97      private int[] lineBreaks;
98  
99      /**
100      * Copy constructor.
101      *
102      * @param fileText to make copy of
103      */
104     public FileText(FileText fileText) {
105         file = fileText.file;
106         charset = fileText.charset;
107         fullText = fileText.fullText;
108         lines = fileText.lines.clone();
109         if (fileText.lineBreaks != null) {
110             lineBreaks = fileText.lineBreaks.clone();
111         }
112     }
113 
114     /**
115      * Compatibility constructor.
116      *
117      * <p>This constructor reconstructs the text of the file by joining
118      * lines with linefeed characters. This process does not restore
119      * the original line terminators and should therefore be avoided.
120      *
121      * @param file the name of the file
122      * @param lines the lines of the text, without terminators
123      * @throws NullPointerException if the lines array is null
124      */
125     public FileText(File file, List<String> lines) {
126         final StringBuilder buf = new StringBuilder(1024);
127         for (final String line : lines) {
128             buf.append(line).append('\n');
129         }
130 
131         this.file = file;
132         charset = null;
133         fullText = buf.toString();
134         this.lines = lines.toArray(CommonUtil.EMPTY_STRING_ARRAY);
135     }
136 
137     /**
138      * Creates a new file text representation.
139      *
140      * <p>The file will be read using the specified encoding, replacing
141      * malformed input and unmappable characters with the default
142      * replacement character.
143      *
144      * @param file the name of the file
145      * @param charsetName the encoding to use when reading the file
146      * @throws NullPointerException if the text is null
147      * @throws IllegalStateException if the charset is not supported.
148      * @throws IOException if the file could not be read
149      */
150     public FileText(File file, String charsetName) throws IOException {
151         this.file = file;
152 
153         // We use our own decoder, to be sure we have complete control
154         // about replacements.
155         final CharsetDecoder decoder;
156         try {
157             charset = Charset.forName(charsetName);
158             decoder = charset.newDecoder();
159             decoder.onMalformedInput(CodingErrorAction.REPLACE);
160             decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
161         }
162         catch (final UnsupportedCharsetException ex) {
163             final String message = "Unsupported charset: " + charsetName;
164             throw new IllegalStateException(message, ex);
165         }
166 
167         fullText = readFile(file, decoder);
168 
169         // Use the BufferedReader to break down the lines as this
170         // is about 30% faster than using the
171         // LINE_TERMINATOR.split(fullText, -1) method
172         try (BufferedReader reader = new BufferedReader(new StringReader(fullText))) {
173             final ArrayList<String> textLines = new ArrayList<>();
174             while (true) {
175                 final String line = reader.readLine();
176                 if (line == null) {
177                     break;
178                 }
179                 textLines.add(line);
180             }
181             lines = textLines.toArray(CommonUtil.EMPTY_STRING_ARRAY);
182         }
183     }
184 
185     /**
186      * Reads file using specific decoder and returns all its content as a String.
187      *
188      * @param inputFile File to read
189      * @param decoder Charset decoder
190      * @return File's text
191      * @throws IOException Unable to open or read the file
192      * @throws FileNotFoundException when inputFile does not exist
193      */
194     private static String readFile(final File inputFile, final CharsetDecoder decoder)
195             throws IOException {
196         if (!inputFile.exists()) {
197             throw new FileNotFoundException(inputFile.getPath() + " (No such file or directory)");
198         }
199         final StringBuilder buf = new StringBuilder(1024);
200         final InputStream stream = Files.newInputStream(inputFile.toPath());
201         try (Reader reader = new InputStreamReader(stream, decoder)) {
202             final char[] chars = new char[READ_BUFFER_SIZE];
203             while (true) {
204                 final int len = reader.read(chars);
205                 if (len == -1) {
206                     break;
207                 }
208                 buf.append(chars, 0, len);
209             }
210         }
211         return buf.toString();
212     }
213 
214     /**
215      * Retrieves a line of the text by its number.
216      * The returned line will not contain a trailing terminator.
217      *
218      * @param lineNo the number of the line to get, starting at zero
219      * @return the line with the given number
220      */
221     public String get(final int lineNo) {
222         return lines[lineNo];
223     }
224 
225     /**
226      * Get the name of the file.
227      *
228      * @return an object containing the name of the file
229      */
230     public File getFile() {
231         return file;
232     }
233 
234     /**
235      * Get the character set which was used to read the file.
236      * Will be {@code null} for a file reconstructed from its lines.
237      *
238      * @return the charset used when the file was read
239      */
240     public Charset getCharset() {
241         return charset;
242     }
243 
244     /**
245      * Retrieve the full text of the file.
246      *
247      * @return the full text of the file
248      */
249     public CharSequence getFullText() {
250         return fullText;
251     }
252 
253     /**
254      * Returns an array of all lines.
255      * {@code text.toLinesArray()} is equivalent to
256      * {@code text.toArray(new String[text.size()])}.
257      *
258      * @return an array of all lines of the text
259      */
260     public String[] toLinesArray() {
261         return lines.clone();
262     }
263 
264     /**
265      * Determine line and column numbers in full text.
266      *
267      * @param pos the character position in the full text
268      * @return the line and column numbers of this character
269      */
270     public LineColumn lineColumn(int pos) {
271         final int[] lineBreakPositions = findLineBreaks();
272         int lineNo = Arrays.binarySearch(lineBreakPositions, pos);
273         if (lineNo < 0) {
274             // we have: lineNo = -(insertion point) - 1
275             // we want: lineNo =  (insertion point) - 1
276             lineNo = -lineNo - 2;
277         }
278         final int startOfLine = lineBreakPositions[lineNo];
279         final int columnNo = pos - startOfLine;
280         // now we have lineNo and columnNo, both starting at zero.
281         return new LineColumn(lineNo + 1, columnNo);
282     }
283 
284     /**
285      * Find positions of line breaks in the full text.
286      *
287      * @return an array giving the first positions of each line.
288      */
289     private int[] findLineBreaks() {
290         if (lineBreaks == null) {
291             final int[] lineBreakPositions = new int[size() + 1];
292             lineBreakPositions[0] = 0;
293             int lineNo = 1;
294             final Matcher matcher = LINE_TERMINATOR.matcher(fullText);
295             while (matcher.find()) {
296                 lineBreakPositions[lineNo] = matcher.end();
297                 lineNo++;
298             }
299             if (lineNo < lineBreakPositions.length) {
300                 lineBreakPositions[lineNo] = fullText.length();
301             }
302             lineBreaks = lineBreakPositions;
303         }
304         return lineBreaks;
305     }
306 
307     /**
308      * Counts the lines of the text.
309      *
310      * @return the number of lines in the text
311      */
312     public int size() {
313         return lines.length;
314     }
315 
316 }