/*
 * DSI utilities
 *
 * Copyright (C) 2020-2023 Sebastiano Vigna
 *
 * This program and the accompanying materials are made available under the
 * terms of the GNU Lesser General Public License v2.1 or later,
 * which is available at
 * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
 * or the Apache Software License 2.0, which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
 */

package it.unimi.dsi.io;

import java.io.Closeable;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.reflect.Constructor;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.zip.GZIPInputStream;

import it.unimi.dsi.fastutil.BigList;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectBigArrayBigList;
import it.unimi.dsi.fastutil.objects.ObjectBigList;
import it.unimi.dsi.fastutil.objects.ObjectIterables;
import it.unimi.dsi.fastutil.objects.ObjectList;
import it.unimi.dsi.lang.MutableString;

/**
 * A wrapper exhibiting the lines of a file as an {@link Iterable} of {@linkplain MutableString
 * mutable strings}.
 *
 * <P>
 * <strong>Warning</strong>: the lines returned by iterators generated by instances of this class
 * <em>are not cacheable</em>. The returned value is a {@link it.unimi.dsi.lang.MutableString
 * MutableString} instance that is reused at each call, and that is <em>modified by a call to
 * {@link java.util.Iterator#hasNext() hasNext()}</em>. Thus, for instance,
 *
 * <pre>
 * ObjectIterators.unwrap(fileLinesMutableStringIterable.iterator());
 * </pre>
 *
 * will not give the expected results. Use {@link #allLines()} or {@link #allLinesBig()} for that
 * purpose.
 *
 * <P>
 * The result of a call to {@link #iterator()} can be used to scan the file; each call will open an
 * independent input stream. The returned iterator type
 * ({@link it.unimi.dsi.io.FileLinesMutableStringIterable.FileLinesIterator FileLinesIterator}) is
 * {@link java.io.Closeable}, and should be closed after usage. Exhausted iterators, however, will
 * be closed automagically.
 *
 * <p>
 * Using a suitable {@linkplain #FileLinesMutableStringIterable(String, long, Charset, Class)
 * constructor} it is possible to specify a decompression class, which must extend
 * {@link InputStream} and provide a constructor accepting an {@link InputStream} (e.g.,
 * {@link GZIPInputStream} if the file is compressed in <code>gzip</code> format).
 *
 * <p>
 * Convenience {@linkplain #iterator(InputStream, Charset, Class) static methods} makes it possible
 * to build on the fly an iterator over an input stream using the same conventions.
 *
 * <p>
 * This class implements {@link #size64()}, which will return the number of lines of the file,
 * computed with a full scan at the first invocation. However, it is also possible to specify at
 * construction time the number of lines in the file to skip the first scan. It is responsibility of
 * the caller that the specified size and the actual number of lines in the file do match.
 *
 * @author Sebastiano Vigna
 * @since 2.6.17
 */
public class FileLinesMutableStringIterable implements Iterable<MutableString>, Size64 {
	/** The filename upon which this file-lines iterable is based. */
	private final String filename;
	/** The encoding of {@link #filename}. */
	private final Charset encoding;
	/**
	 * A constructor for a stream decompressor for this iterable, or {@code null} for no compression.
	 */
	private final Constructor<? extends InputStream> decompressor;
	/** The cached size of this iterable. */
	private long size = -1;

	/**
	 * Creates a file-lines iterable for the specified filename using the platform encoding.
	 *
	 * @param filename a filename.
	 */
	public FileLinesMutableStringIterable(final String filename) {
		this(filename, -1);
	}

	/**
	 * Creates a file-lines iterable for the specified filename and size using the platform encoding.
	 *
	 * @param filename a filename.
	 * @param size the number of lines in the file.
	 */
	public FileLinesMutableStringIterable(final String filename, final long size) {
		this(filename, size, Charset.defaultCharset());
	}

	/**
	 * Creates a file-lines iterable for the specified filename using the specified encoding.
	 *
	 * @param filename a filename.
	 * @param encoding an encoding.
	 */
	public FileLinesMutableStringIterable(final String filename, final Charset encoding) {
		this(filename, -1, encoding);
	}

	/**
	 * Creates a file-lines iterable for the specified filename and size using the specified encoding.
	 *
	 * @param filename a filename.
	 * @param size the number of lines in the file.
	 * @param encoding an encoding.
	 */
	public FileLinesMutableStringIterable(final String filename, final long size, final Charset encoding) {
		this.filename = filename;
		this.encoding = encoding;
		this.decompressor = null;
		this.size = size;
	}

	/**
	 * Creates a file-lines iterable for the specified filename using the specified encoding.
	 *
	 * @param filename a filename.
	 * @param encoding an encoding.
	 */
	public FileLinesMutableStringIterable(final String filename, final String encoding) {
		this(filename, -1, encoding);
	}

	/**
	 * Creates a file-lines iterable for the specified filename and size using the specified encoding.
	 *
	 * @param filename a filename.
	 * @param size the number of lines in the file.
	 * @param encoding an encoding.
	 */
	public FileLinesMutableStringIterable(final String filename, final long size, final String encoding) {
		this.filename = filename;
		this.encoding = Charset.forName(encoding);
		this.decompressor = null;
		this.size = size;
	}

	/**
	 * Creates a file-lines iterable for the specified filename using the platform encoding, optionally
	 * assuming that the file is compressed.
	 *
	 * @param filename a filename.
	 * @param decompressor a class extending {@link InputStream} that will be used as a decompressor, or
	 *            {@code null} for no decompression.
	 */
	public FileLinesMutableStringIterable(final String filename, final Class<? extends InputStream> decompressor) throws NoSuchMethodException, SecurityException {
		this(filename, -1, Charset.defaultCharset(), decompressor);
	}

	/**
	 * Creates a file-lines iterable for the specified filename and size using the platform encoding,
	 * optionally assuming that the file is compressed.
	 *
	 * @param filename a filename.
	 * @param size the number of lines in the file.
	 * @param decompressor a class extending {@link InputStream} that will be used as a decompressor, or
	 *            {@code null} for no decompression.
	 */
	public FileLinesMutableStringIterable(final String filename, final long size, final Class<? extends InputStream> decompressor) throws NoSuchMethodException, SecurityException {
		this(filename, size, Charset.defaultCharset(), decompressor);
	}

	/**
	 * Creates a file-lines iterable for the specified filename with the specified encoding, optionally
	 * assuming that the file is compressed.
	 *
	 * @param filename a filename.
	 * @param encoding an encoding.
	 * @param decompressor a class extending {@link InputStream} that will be used as a decompressor, or
	 *            {@code null} for no decompression.
	 */
	public FileLinesMutableStringIterable(final String filename, final Charset encoding, final Class<? extends InputStream> decompressor) throws NoSuchMethodException, SecurityException {
		this(filename, -1, encoding, decompressor);
	}

	/**
	 * Creates a file-lines iterable for the specified filename with the specified encoding, optionally
	 * assuming that the file is compressed.
	 *
	 * @param filename a filename.
	 * @param encoding an encoding.
	 * @param decompressor a class extending {@link InputStream} that will be used as a decompressor, or
	 *            {@code null} for no decompression.
	 */
	public FileLinesMutableStringIterable(final String filename, final String encoding, final Class<? extends InputStream> decompressor) throws NoSuchMethodException, SecurityException {
		this(filename, -1, Charset.forName(encoding), decompressor);
	}

	/**
	 * Creates a file-lines iterable for the specified filename and size with the specified encoding,
	 * optionally assuming that the file is compressed.
	 *
	 * @param filename a filename.
	 * @param size the number of lines in the file.
	 * @param encoding an encoding.
	 * @param decompressor a class extending {@link InputStream} that will be used as a decompressor, or
	 *            {@code null} for no decompression.
	 */
	public FileLinesMutableStringIterable(final String filename, final long size, final Charset encoding, final Class<? extends InputStream> decompressor) throws NoSuchMethodException, SecurityException {
		this.filename = filename;
		this.encoding = encoding;
		this.decompressor = decompressor != null ? decompressor.getConstructor(InputStream.class) : null;
		this.size = size;
	}

	/**
	 * An iterator over the lines of a {@link FileLinesMutableStringIterable}.
	 *
	 * <p>
	 * Instances of this class open an {@linkplain java.io.InputStream input stream}, and thus should be
	 * {@linkplain Closeable#close() closed} after usage. A &ldquo;safety-net&rdquo; finaliser tries to
	 * take care of the cases in which closing an instance is impossible. An exhausted iterator,
	 * however, will be closed automagically.
	 */

	public static final class FileLinesIterator implements Iterator<MutableString>, SafelyCloseable {
		private final MutableString s = new MutableString();
		private FastBufferedReader fbr;
		private MutableString next;
		private boolean toAdvance = true;

		private FileLinesIterator(final FastBufferedReader fbr) {
			this.fbr = fbr;
		}

		@Override
		public boolean hasNext() {
			if (toAdvance) {
				try {
					next = fbr.readLine(s);
					if (next == null) close();
				} catch (final IOException e) {
					throw new RuntimeException(e);
				}
				toAdvance = false;
			}

			return next != null;
		}

		@Override
		public MutableString next() {
			if (! hasNext()) throw new NoSuchElementException();
			toAdvance = true;
			return s;
		}

		@Override
		public void remove() {
			throw new UnsupportedOperationException();
		}

		@Override
		public synchronized void close() {
			if (fbr == null) return;
			try {
				fbr.close();
			}
			catch (final IOException e) {
				throw new RuntimeException(e);
			}
			finally {
				fbr = null;
			}
		}

		@Override
		protected synchronized void finalize() throws Throwable {
			try {
				if (fbr != null) close();
			}
			finally {
				super.finalize();
			}
		}

	}

	@Override
	public FileLinesIterator iterator() {
		try {
			final InputStream inputStream = decompressor == null ? new FileInputStream(filename) : decompressor.newInstance(new FileInputStream(filename));
			return new FileLinesIterator(new FastBufferedReader(new InputStreamReader(inputStream, encoding)));
		} catch (final Exception e) {
			throw new RuntimeException(e);
		}
	}

	/**
	 * A convenience method returning a one-off {@link FileLinesIterator} reading from an input stream.
	 *
	 * @param inputStream an input stream.
	 * @param encoding an encoding.
	 * @return an iterator returning the lines contained in the provided input stream.
	 */
	public static FileLinesIterator iterator(final InputStream inputStream, final Charset encoding) {
		return iterator(inputStream, encoding, null);
	}

	/**
	 * A convenience method returning a one-off {@link FileLinesIterator} reading from an input stream.
	 *
	 * @param inputStream an input stream.
	 * @param encoding an encoding.
	 * @param decompressor a class extending {@link InputStream} that will be used as a decompressor, or
	 *            {@code null} for no decompression.
	 * @return an iterator returning the lines contained in the provided input stream.
	 */
	public static FileLinesIterator iterator(InputStream inputStream, final Charset encoding, final Class<? extends InputStream> decompressor) {
		try {
			if (decompressor != null) inputStream = decompressor.getConstructor(InputStream.class).newInstance(inputStream);
			return new FileLinesIterator(new FastBufferedReader(new InputStreamReader(inputStream, encoding)));
		} catch (final Exception e) {
			throw new RuntimeException(e);
		}
	}

	@Override
	public synchronized long size64() {
		if (size == -1) size = ObjectIterables.size(this);
		return size;
	}

	/**
	 * Returns all lines as a {@linkplain java.util.List list}.
	 *
	 * @implSpec This method iterates over the lines of the file and accumulates the resulting strings
	 *           in a standard list. Thus, it will throw an exception on files with more than
	 *           {@link Integer#MAX_VALUE} lines.
	 *
	 * @return all lines of the file wrapped by this file-lines collection.
	 * @see #allLinesBig()
	 */
	public ObjectList<String> allLines() {
		final ObjectArrayList<String> result = new ObjectArrayList<>();
		for (final MutableString s : this) result.add(s.toString());
		return result;
	}

	/**
	 * Returns all lines as a {@linkplain BigList big list}.
	 *
	 * @implSpec This method iterates over the lines of the file and accumulates the resulting strings
	 *           in a {@linkplain BigList big list}. Thus, it supports files with more than
	 *           {@link Integer#MAX_VALUE} lines.
	 *
	 * @return all lines of the file wrapped by this file-lines collection.
	 * @see #allLines()
	 */
	public ObjectBigList<String> allLinesBig() {
		final ObjectBigArrayBigList<String> result = new ObjectBigArrayBigList<>();
		for (final MutableString s : this) result.add(s.toString());
		return result;
	}
}
