package artificialFastqGenerator;

import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * Instances of this class calculate regional and overall coverage statistics for nucleobases.
 * 
 * Copyright (C) 2012 The Institute of Cancer Research (ICR).
 *
 * This file is part of ArtificialFastqGenerator v1.0.0.
 * 
 * ArtificialFastqGenerator is free software: you can redistribute it and/or modify it under the terms of the GNU General
 * Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any
 * later version.
 * 
 * This program is distributed in the hope that it will be useful but WITHOUT ANY WARRANTY; without even the implied warranty
 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU Public License along with this program. If not, see
 * <http://www.gnu.org/licenses/>
 * 
 * Authour's contact email: Matthew.Frampton@icr.ac.uk
 */

public class CoverageStatisticsCalculator {

	//Fields for summary stats
	private double regionTotalCov, regionACGTTotalCov, overallTotalCov, overallACGTTotalCov;
	private long regionNumBasesProcessed;
	private int regionMinCov, regionMaxCov;
	private int regionNumACGTsProcessed, regionACGTMinCov, regionACGTMaxCov;
	private long overallNumBasesProcessed;
	private int overallMinCov, overallMaxCov;
	private int overallNumACGTsProcessed, overallACGTMinCov, overallACGTMaxCov;
	private static final int multipleOfMeanForInitMinCov = 1000000;
	private boolean overallStatsUpdatedFromRegionStatsAtLeastOnce;
	private Logger logger;
	
	/**
	 * Initialise a new CoverageStatisticsCalculator object.
	 */
	
	public CoverageStatisticsCalculator() {
		
		regionTotalCov = 0.0;
		regionACGTTotalCov = 0.0;
		overallTotalCov = 0.0;
		overallACGTTotalCov = 0.0;
		
		regionNumBasesProcessed = 0;
		regionMinCov = (int) Main.coverageMeanPeak*multipleOfMeanForInitMinCov;
		regionMaxCov = 0;
		
		regionNumACGTsProcessed = 0;
		regionACGTMinCov = (int) Main.coverageMeanPeak*multipleOfMeanForInitMinCov;
		regionACGTMaxCov = 0;
		
		overallNumBasesProcessed = 0;
		overallMinCov = (int) Main.coverageMeanPeak*multipleOfMeanForInitMinCov;
		overallMaxCov = 0;
		
		overallNumACGTsProcessed = 0;
		overallACGTMinCov = (int) Main.coverageMeanPeak*multipleOfMeanForInitMinCov;
		overallACGTMaxCov = 0;	
		
		overallStatsUpdatedFromRegionStatsAtLeastOnce = false;
		
		logger = Main.logger;
		
	}
	
	/**
	 * Log the coverage statistics header.
	 */
	
	public void logCoverageStatsHeader() {

		logger.log(Level.INFO,"ALL_SO_FAR_NUM,ALL_AV_COV,ALL_MIN_COV,ALL_MAX_COV,ACGT_NUM,ACGT_AV_COV,ACGT_MIN_COV," +
				"ACGT_MAX_COV");
		
	}
	
	/**
	 * Update the region coverage statistics from the coverage statistics for 1
	 * nucleobase in that region.
	 * 
	 * @param nucleobase - nucleobase from which to update region stats.
	 */
	
	public void updateRegionStatsFrom1Nucleobase(Nucleobase nucleobase) {
		
		//Stats for all bases i.e. including Ns:
		int numReads = nucleobase.getCoverage();
		if (numReads < regionMinCov) {regionMinCov = numReads;}
		else if (numReads > regionMaxCov) {regionMaxCov = numReads;}
		regionTotalCov = regionTotalCov + numReads;
		regionNumBasesProcessed = regionNumBasesProcessed + 1;
		//Stats for A/C/G/Ts:
		if (ArtificialFastqGenerationUtils.isACGT(nucleobase.getGenotype())) {
			if (numReads < regionACGTMinCov) {regionACGTMinCov = numReads;}
			if (numReads > regionACGTMaxCov) {regionACGTMaxCov = numReads;}
			//else if (numReads > regionACGTMaxCov) {regionACGTMaxCov = numReads;}
			regionACGTTotalCov = regionACGTTotalCov + numReads;
			regionNumACGTsProcessed = regionNumACGTsProcessed + 1;
		}
		
	}
	
	/**
	 * Update the overall coverage statistics from region coverage statistics.
	 */
	
	private void updateOverallStatsFromRegionStats() {
		
		overallStatsUpdatedFromRegionStatsAtLeastOnce = true;
		//Update overall stats for all bases i.e. including Ns:
		overallNumBasesProcessed = overallNumBasesProcessed + regionNumBasesProcessed;
		overallTotalCov = overallTotalCov + regionTotalCov;
		if (regionMinCov < overallMinCov) {overallMinCov = regionMinCov;}
		if (regionMaxCov > overallMaxCov) {overallMaxCov = regionMaxCov;}
		//Update overall stats for A/C/G/Ts:
		overallNumACGTsProcessed = overallNumACGTsProcessed + regionNumACGTsProcessed;
		overallACGTTotalCov = overallACGTTotalCov + regionACGTTotalCov;
		if (regionACGTMinCov < overallACGTMinCov) {overallACGTMinCov = regionACGTMinCov;}
		if (regionACGTMaxCov > overallACGTMaxCov) {overallACGTMaxCov = regionACGTMaxCov;}
		
	}

	/**
	 * Log the region coverage statistics.
	 */
	
	public void logRegionStats() {
	
		updateOverallStatsFromRegionStats();
		
		double regionAverageCoverage = regionTotalCov/regionNumBasesProcessed;
		double regionACGTAverageCoverage = regionACGTTotalCov/regionNumACGTsProcessed;
		if (regionMaxCov == 0) {
			regionMinCov = 0;
			regionAverageCoverage = 0;}
		if (regionACGTMaxCov == 0) {
			regionACGTMinCov = 0;
			regionACGTAverageCoverage = 0;}
			
		logger.log(Level.INFO, overallNumBasesProcessed + "," + regionAverageCoverage + "," + regionMinCov + "," + 
		regionMaxCov + "," + regionNumACGTsProcessed + "," + regionACGTAverageCoverage + "," + regionACGTMinCov + "," +
				regionACGTMaxCov);
		resetRegionStats();
		
	}
	
	/**
	 * Reset the region coverage statistics.
	 */
	
	private void resetRegionStats() {
		
		regionTotalCov = 0.0;
		regionNumBasesProcessed = 0;
		regionMinCov = (int) Main.coverageMeanPeak*multipleOfMeanForInitMinCov;
		regionMaxCov = 0;
		regionACGTTotalCov = 0.0;
		regionNumACGTsProcessed = 0;
		regionACGTMinCov = (int) Main.coverageMeanPeak*multipleOfMeanForInitMinCov;
		regionACGTMaxCov = 0;
		
	}
	
	/**
	 * Log the overall coverage and error statistics.
	 */
	
	
	public void logOverallStats() {
		
		if (overallStatsUpdatedFromRegionStatsAtLeastOnce == false) {
			updateOverallStatsFromRegionStats();
		}
		
		logger.log(Level.INFO,"");
		logger.log(Level.INFO,"OVERALL STATISTICS:");
		logger.log(Level.INFO,"# nucleobases processed=" + overallNumBasesProcessed);
		logger.log(Level.INFO,"Average coverage=" + overallTotalCov/overallNumBasesProcessed);
		logger.log(Level.INFO,"Minimum coverage=" + overallMinCov);
		logger.log(Level.INFO,"Maximum coverage=" + overallMaxCov);
		logger.log(Level.INFO,"# ACGTs processed=" + overallNumACGTsProcessed);
		if (overallNumACGTsProcessed != 0.0) {
			logger.log(Level.INFO,"ACGT average coverage=" + overallACGTTotalCov/overallNumACGTsProcessed);
			logger.log(Level.INFO,"ACGT minimum coverage=" + overallACGTMinCov);
			logger.log(Level.INFO,"ACGT maximum coverage=" + overallACGTMaxCov);
		}
		
	}
	
}
