001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import java.io.Closeable;
020import java.io.File;
021import java.io.IOException;
022import java.io.UnsupportedEncodingException;
023import java.nio.ByteBuffer;
024import java.nio.channels.SeekableByteChannel;
025import java.nio.charset.Charset;
026import java.nio.charset.CharsetEncoder;
027import java.nio.charset.StandardCharsets;
028import java.nio.file.Files;
029import java.nio.file.Path;
030import java.nio.file.StandardOpenOption;
031import java.util.ArrayList;
032import java.util.Arrays;
033import java.util.Collections;
034import java.util.List;
035
036import org.apache.commons.io.Charsets;
037import org.apache.commons.io.FileSystem;
038import org.apache.commons.io.StandardLineSeparator;
039import org.apache.commons.io.build.AbstractOrigin;
040import org.apache.commons.io.build.AbstractStreamBuilder;
041
042/**
043 * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files.
044 * <p>
045 * To build an instance, see {@link Builder}.
046 * </p>
047 *
048 * @since 2.2
049 */
050public class ReversedLinesFileReader implements Closeable {
051
052    /**
053     * Builds a new {@link ReversedLinesFileReader} instance.
054     * <p>
055     * For example:
056     * </p>
057     * <pre>{@code
058     * ReversedLinesFileReader r = ReversedLinesFileReader.builder()
059     *   .setPath(path)
060     *   .setBufferSize(4096)
061     *   .setCharset(StandardCharsets.UTF_8)
062     *   .get();}
063     * </pre>
064     *
065     * @since 2.12.0
066     */
067    public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> {
068
069        public Builder() {
070            setBufferSizeDefault(DEFAULT_BLOCK_SIZE);
071            setBufferSize(DEFAULT_BLOCK_SIZE);
072        }
073
074        /**
075         * Constructs a new instance.
076         * <p>
077         * This builder use the aspects Path, Charset, buffer size.
078         * </p>
079         * <p>
080         * You must provide an origin that can be converted to a Path by this builder, otherwise, this call will throw an
081         * {@link UnsupportedOperationException}.
082         * </p>
083         *
084         * @return a new instance.
085         * @throws UnsupportedOperationException if the origin cannot provide a Path.
086         * @see AbstractOrigin#getPath()
087         */
088        @Override
089        public ReversedLinesFileReader get() throws IOException {
090            return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset());
091        }
092
093    }
094
095    private class FilePart {
096        private final long no;
097
098        private final byte[] data;
099
100        private byte[] leftOver;
101
102        private int currentLastBytePos;
103
104        /**
105         * Constructs a new instance.
106         *
107         * @param no                     the part number
108         * @param length                 its length
109         * @param leftOverOfLastFilePart remainder
110         * @throws IOException if there is a problem reading the file
111         */
112        private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException {
113            this.no = no;
114            final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0);
115            this.data = new byte[dataLength];
116            final long off = (no - 1) * blockSize;
117
118            // read data
119            if (no > 0 /* file not empty */) {
120                channel.position(off);
121                final int countRead = channel.read(ByteBuffer.wrap(data, 0, length));
122                if (countRead != length) {
123                    throw new IllegalStateException("Count of requested bytes and actually read bytes don't match");
124                }
125            }
126            // copy left over part into data arr
127            if (leftOverOfLastFilePart != null) {
128                System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length);
129            }
130            this.currentLastBytePos = data.length - 1;
131            this.leftOver = null;
132        }
133
134        /**
135         * Constructs the buffer containing any leftover bytes.
136         */
137        private void createLeftOver() {
138            final int lineLengthBytes = currentLastBytePos + 1;
139            if (lineLengthBytes > 0) {
140                // create left over for next block
141                leftOver = Arrays.copyOf(data, lineLengthBytes);
142            } else {
143                leftOver = null;
144            }
145            currentLastBytePos = -1;
146        }
147
148        /**
149         * Finds the new-line sequence and return its length.
150         *
151         * @param data buffer to scan
152         * @param i    start offset in buffer
153         * @return length of newline sequence or 0 if none found
154         */
155        private int getNewLineMatchByteCount(final byte[] data, final int i) {
156            for (final byte[] newLineSequence : newLineSequences) {
157                boolean match = true;
158                for (int j = newLineSequence.length - 1; j >= 0; j--) {
159                    final int k = i + j - (newLineSequence.length - 1);
160                    match &= k >= 0 && data[k] == newLineSequence[j];
161                }
162                if (match) {
163                    return newLineSequence.length;
164                }
165            }
166            return 0;
167        }
168
169        /**
170         * Reads a line.
171         *
172         * @return the line or null
173         */
174        private String readLine() { //NOPMD Bug in PMD
175
176            String line = null;
177            int newLineMatchByteCount;
178
179            final boolean isLastFilePart = no == 1;
180
181            int i = currentLastBytePos;
182            while (i > -1) {
183
184                if (!isLastFilePart && i < avoidNewlineSplitBufferSize) {
185                    // avoidNewlineSplitBuffer: for all except the last file part we
186                    // take a few bytes to the next file part to avoid splitting of newlines
187                    createLeftOver();
188                    break; // skip last few bytes and leave it to the next file part
189                }
190
191                // --- check for newline ---
192                if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) {
193                    final int lineStart = i + 1;
194                    final int lineLengthBytes = currentLastBytePos - lineStart + 1;
195
196                    if (lineLengthBytes < 0) {
197                        throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes);
198                    }
199                    final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes);
200
201                    line = new String(lineData, charset);
202
203                    currentLastBytePos = i - newLineMatchByteCount;
204                    break; // found line
205                }
206
207                // --- move cursor ---
208                i -= byteDecrement;
209
210                // --- end of file part handling ---
211                if (i < 0) {
212                    createLeftOver();
213                    break; // end of file part
214                }
215            }
216
217            // --- last file part handling ---
218            if (isLastFilePart && leftOver != null) {
219                // there will be no line break anymore, this is the first line of the file
220                line = new String(leftOver, charset);
221                leftOver = null;
222            }
223
224            return line;
225        }
226
227        /**
228         * Handles block rollover
229         *
230         * @return the new FilePart or null
231         * @throws IOException if there was a problem reading the file
232         */
233        private FilePart rollOver() throws IOException {
234
235            if (currentLastBytePos > -1) {
236                throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... "
237                        + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos);
238            }
239
240            if (no > 1) {
241                return new FilePart(no - 1, blockSize, leftOver);
242            }
243            // NO 1 was the last FilePart, we're finished
244            if (leftOver != null) {
245                throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart="
246                        + new String(leftOver, charset));
247            }
248            return null;
249        }
250    }
251
252    private static final String EMPTY_STRING = "";
253
254    private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize();
255
256    /**
257     * Constructs a new {@link Builder}.
258     *
259     * @return a new {@link Builder}.
260     * @since 2.12.0
261     */
262    public static Builder builder() {
263        return new Builder();
264    }
265
266    private final int blockSize;
267    private final Charset charset;
268    private final SeekableByteChannel channel;
269    private final long totalByteLength;
270    private final long totalBlockCount;
271    private final byte[][] newLineSequences;
272    private final int avoidNewlineSplitBufferSize;
273    private final int byteDecrement;
274    private FilePart currentFilePart;
275    private boolean trailingNewlineOfFileSkipped;
276
277    /**
278     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
279     * platform's default encoding.
280     *
281     * @param file the file to be read
282     * @throws IOException if an I/O error occurs.
283     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
284     */
285    @Deprecated
286    public ReversedLinesFileReader(final File file) throws IOException {
287        this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset());
288    }
289
290    /**
291     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
292     * specified encoding.
293     *
294     * @param file    the file to be read
295     * @param charset the charset to use, null uses the default Charset.
296     * @throws IOException if an I/O error occurs.
297     * @since 2.5
298     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
299     */
300    @Deprecated
301    public ReversedLinesFileReader(final File file, final Charset charset) throws IOException {
302        this(file.toPath(), charset);
303    }
304
305    /**
306     * Constructs a ReversedLinesFileReader with the given block size and encoding.
307     *
308     * @param file      the file to be read
309     * @param blockSize size of the internal buffer (for ideal performance this
310     *                  should match with the block size of the underlying file
311     *                  system).
312     * @param charset  the encoding of the file, null uses the default Charset.
313     * @throws IOException if an I/O error occurs.
314     * @since 2.3
315     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
316     */
317    @Deprecated
318    public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException {
319        this(file.toPath(), blockSize, charset);
320    }
321
322    /**
323     * Constructs a ReversedLinesFileReader with the given block size and encoding.
324     *
325     * @param file      the file to be read
326     * @param blockSize size of the internal buffer (for ideal performance this
327     *                  should match with the block size of the underlying file
328     *                  system).
329     * @param charsetName  the encoding of the file, null uses the default Charset.
330     * @throws IOException                                  if an I/O error occurs
331     * @throws java.nio.charset.UnsupportedCharsetException thrown instead of
332     *                                                      {@link UnsupportedEncodingException}
333     *                                                      in version 2.2 if the
334     *                                                      encoding is not
335     *                                                      supported.
336     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
337     */
338    @Deprecated
339    public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException {
340        this(file.toPath(), blockSize, charsetName);
341    }
342
343    /**
344     * Constructs a ReversedLinesFileReader with default block size of 4KB and the
345     * specified encoding.
346     *
347     * @param file    the file to be read
348     * @param charset the charset to use, null uses the default Charset.
349     * @throws IOException if an I/O error occurs.
350     * @since 2.7
351     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
352     */
353    @Deprecated
354    public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException {
355        this(file, DEFAULT_BLOCK_SIZE, charset);
356    }
357
358    /**
359     * Constructs a ReversedLinesFileReader with the given block size and encoding.
360     *
361     * @param file      the file to be read
362     * @param blockSize size of the internal buffer (for ideal performance this
363     *                  should match with the block size of the underlying file
364     *                  system).
365     * @param charset  the encoding of the file, null uses the default Charset.
366     * @throws IOException if an I/O error occurs.
367     * @since 2.7
368     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
369     */
370    @Deprecated
371    public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException {
372        this.blockSize = blockSize;
373        this.charset = Charsets.toCharset(charset);
374
375        // --- check & prepare encoding ---
376        final CharsetEncoder charsetEncoder = this.charset.newEncoder();
377        final float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
378        if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) {
379            // all one byte encodings are no problem
380            byteDecrement = 1;
381        } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8
382        // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
383                this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese)
384                this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean)
385                this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese)
386                this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese)
387            byteDecrement = 1;
388        } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) {
389            // UTF-16 new line sequences are not allowed as second tuple of four byte
390            // sequences,
391            // however byte order has to be specified
392            byteDecrement = 2;
393        } else if (this.charset == StandardCharsets.UTF_16) {
394            throw new UnsupportedEncodingException(
395                    "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)");
396        } else {
397            throw new UnsupportedEncodingException(
398                    "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)");
399        }
400
401        // NOTE: The new line sequences are matched in the order given, so it is
402        // important that \r\n is BEFORE \n
403        this.newLineSequences = new byte[][] {
404            StandardLineSeparator.CRLF.getBytes(this.charset),
405            StandardLineSeparator.LF.getBytes(this.charset),
406            StandardLineSeparator.CR.getBytes(this.charset)
407        };
408
409        this.avoidNewlineSplitBufferSize = newLineSequences[0].length;
410
411        // Open file
412        this.channel = Files.newByteChannel(file, StandardOpenOption.READ);
413        this.totalByteLength = channel.size();
414        int lastBlockLength = (int) (this.totalByteLength % blockSize);
415        if (lastBlockLength > 0) {
416            this.totalBlockCount = this.totalByteLength / blockSize + 1;
417        } else {
418            this.totalBlockCount = this.totalByteLength / blockSize;
419            if (this.totalByteLength > 0) {
420                lastBlockLength = blockSize;
421            }
422        }
423        this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
424
425    }
426
427    /**
428     * Constructs a ReversedLinesFileReader with the given block size and encoding.
429     *
430     * @param file        the file to be read
431     * @param blockSize   size of the internal buffer (for ideal performance this
432     *                    should match with the block size of the underlying file
433     *                    system).
434     * @param charsetName the encoding of the file, null uses the default Charset.
435     * @throws IOException                                  if an I/O error occurs
436     * @throws java.nio.charset.UnsupportedCharsetException thrown instead of
437     *                                                      {@link UnsupportedEncodingException}
438     *                                                      in version 2.2 if the
439     *                                                      encoding is not
440     *                                                      supported.
441     * @since 2.7
442     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
443     */
444    @Deprecated
445    public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException {
446        this(file, blockSize, Charsets.toCharset(charsetName));
447    }
448
449    /**
450     * Closes underlying resources.
451     *
452     * @throws IOException if an I/O error occurs.
453     */
454    @Override
455    public void close() throws IOException {
456        channel.close();
457    }
458
459    /**
460     * Returns the lines of the file from bottom to top.
461     *
462     * @return the next line or null if the start of the file is reached
463     * @throws IOException if an I/O error occurs.
464     */
465    public String readLine() throws IOException {
466
467        String line = currentFilePart.readLine();
468        while (line == null) {
469            currentFilePart = currentFilePart.rollOver();
470            if (currentFilePart == null) {
471                // no more FileParts: we're done, leave line set to null
472                break;
473            }
474            line = currentFilePart.readLine();
475        }
476
477        // aligned behavior with BufferedReader that doesn't return a last, empty line
478        if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) {
479            trailingNewlineOfFileSkipped = true;
480            line = readLine();
481        }
482
483        return line;
484    }
485
486    /**
487     * Returns {@code lineCount} lines of the file from bottom to top.
488     * <p>
489     * If there are less than {@code lineCount} lines in the file, then that's what
490     * you get.
491     * </p>
492     * <p>
493     * Note: You can easily flip the result with {@link Collections#reverse(List)}.
494     * </p>
495     *
496     * @param lineCount How many lines to read.
497     * @return A new list
498     * @throws IOException if an I/O error occurs.
499     * @since 2.8.0
500     */
501    public List<String> readLines(final int lineCount) throws IOException {
502        if (lineCount < 0) {
503            throw new IllegalArgumentException("lineCount < 0");
504        }
505        final ArrayList<String> arrayList = new ArrayList<>(lineCount);
506        for (int i = 0; i < lineCount; i++) {
507            final String line = readLine();
508            if (line == null) {
509                return arrayList;
510            }
511            arrayList.add(line);
512        }
513        return arrayList;
514    }
515
516    /**
517     * Returns the last {@code lineCount} lines of the file.
518     * <p>
519     * If there are less than {@code lineCount} lines in the file, then that's what
520     * you get.
521     * </p>
522     *
523     * @param lineCount How many lines to read.
524     * @return A String.
525     * @throws IOException if an I/O error occurs.
526     * @since 2.8.0
527     */
528    public String toString(final int lineCount) throws IOException {
529        final List<String> lines = readLines(lineCount);
530        Collections.reverse(lines);
531        return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator();
532    }
533
534}