package artificialFastqGenerator;

import java.util.Random;

/**
 * ArtificialFastqGenerationUtils consists exclusively of static utility methods that are used in
 * artificial fastq generation.
 * 
 * Copyright (C) 2012 The Institute of Cancer Research (ICR).
 * 
 * This file is part of ArtificialFastqGenerator v1.0.0.
 * 
 * ArtificialFastqGenerator is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or (at your
 * option) any later version.
 * 
 * This program is distributed in the hope that it will be useful but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
 * more details.
 * 
 * You should have received a copy of the GNU Public License along with this
 * program. If not, see <http://www.gnu.org/licenses/>
 * 
 * Authour's contact email: Matthew.Frampton@icr.ac.uk
 */

public class ArtificialFastqGenerationUtils {

	public static Random randNumGenerator = new Random();
	
	/**
	 * Check if a genotype (represented as a char) is A/C/G/T.
	 * 
	 * @param genotype - the genotype whose type we are checking.
	 * @return isACGT - true if the genotype is A/C/G/T, else false.
	 */
	
	public static boolean isACGT(char genotype) {
		
		boolean isACGT = false;
		if (genotype == 'A' || genotype == 'T' || isGC(genotype)) {
			isACGT = true;
		}
		return isACGT;
	}
	
	/**
	 * Check if a genotype (represented as a char) is G/C.
	 * 
	 * @param genotype - the genotype whose type we are checking.
	 * @return isGC - true if the genotype is G/C, else false.
	 */
	
	public static boolean isGC(char genotype) {
		
		boolean isGC = false;
		if (genotype == 'C' || genotype == 'G') {
			isGC = true;
		}
		return isGC;
		
	}
	
	/**
	 * Return a char array containing the alternate genotypes.
	 * 
	 * @param genotype - genotype for which we want the alternatives.
	 * @return alternateGenotypes - the alternative genotypes.
	 */
	
	public static char[] getAlternateGenotypes(char genotype) {
		
		char[] alternateGenotypes = new char[3];
		int i = 0;
		if (genotype != 'A') {
			alternateGenotypes[i] = 'A';
			i++;}
		if (genotype != 'C') {
			alternateGenotypes[i] = 'C';
			i++;}
		if (genotype != 'G') {
			alternateGenotypes[i] = 'G';
			i++;}
		if (genotype != 'T') {
			alternateGenotypes[i] = 'T';
		}
		return alternateGenotypes;
		
	}
	
	/**
	 * Generates a pseudo-random number from a normal distribution, but with a minimum and maximum value.
	 * 
	 * @param mean - the population mean.
	 * @param standardDeviation - the population standard deviation.
	 * @param minimum - the minimum possible value.
	 * @param maximum - the maximum possible value.
	 * @return randomIntWithinRange - the integer from the bounded normal distribution.
	 */
	
	public static int getRandomIntWithinRange(int mean, double standardDeviation, int minimum, int maximum) {
		
		int randomIntWithinRange = (int) Math.round(randNumGenerator.nextGaussian()*standardDeviation+mean);
		if (randomIntWithinRange > maximum) {
			randomIntWithinRange = maximum;
		} else if (randomIntWithinRange < minimum) {
			randomIntWithinRange = minimum;
		}
		
		return randomIntWithinRange;
		//DEBUGGING
		//return mean;
		
	}
	
	/**
	 * This method calculates a region's target coverage from its GC content. It uses a normal distribution to model target
	 * coverage levels for a given GC content. Hence it first calculates the mean and standard deviation of this
	 * distribution, and then samples from it. It calculates the mean via a Gaussian function f(x) = ae^-((x-b)^2/2c^2) where
	 * a is the height of the curve's peak (coverage mean peak), b is the position of the centre of the peak (coverage mean
	 * peak GC content), and c controls the width of the bell (coverage mean GC content spread). It calculates the standard
	 * deviation as a multiple of the mean.  
	 * 
	 * @param GCcontent
	 * @param coverageMeanPeak
	 * @param coverageMeanPeakGCcontent
	 * @param coverageMeanGCcontentSpread
	 * @param coverageSDdividedByMean
	 * @return targetCoverage
	 */
	
	public static int getTargetCoverageViaGaussFuncOfGCcont(double GCcontent, double coverageMeanPeak, 
			double coverageMeanPeakGCcontent, double coverageMeanGCcontentSpread, double coverageSDdividedByMean) {
		
		double coverageMean = coverageMeanPeak * Math.exp(-1.0*Math.pow(GCcontent-coverageMeanPeakGCcontent,2.0)/(2.0*Math.pow(coverageMeanGCcontentSpread,2.0)));
		double coverageSD = coverageMean * coverageSDdividedByMean;
		int targetCoverage = (int) Math.round(randNumGenerator.nextGaussian()*coverageSD+coverageMean);
		//System.out.println("GCcontent: " + GCcontent + " coverageMean: " + coverageMean + " coverageSD: " + coverageSD + " targetCoverage: " + targetCoverage);
		return targetCoverage;
	}
	
	/**
	 * Generates a random double between 0 and 1.
	 * 
	 * @return randomNumberBtwn0And1 - a pseudo-random double between 0 and 1.
	 */
	
	public static double generateRandomDoubleBetween0And1() {
		
		//Generate a random number from 1 to 10000.
		int maxInt = 10000;
		double randomNumberBtwn0And1 = (double) randNumGenerator.nextInt(maxInt); 
		randomNumberBtwn0And1 = randomNumberBtwn0And1/((double) maxInt);
		return randomNumberBtwn0And1;
		
	}
	
	/**
	 * Generate a stack trace string for an exception object.
	 * 
	 * @param e - the exception object.
	 * @return stackTraceString - the stack trace string for e.
	 */
	
	public static String getStackTraceString(Exception e) {
		String stackTraceString = e.getClass().getName() + ": " + e.getMessage() + "\n";
		StackTraceElement[] stackTraceElements = e.getStackTrace();
		for (int i=0; i<stackTraceElements.length; i++) {
			stackTraceString = stackTraceString + "\tat " + stackTraceElements[i] + "\n";
		}
		return stackTraceString;
	}
	
	/**
	 *
	 * Set the sequence field of Read and BabyRead objects to null and then also set the object to null. This is for helping
	 * Java to garbage collect redundant Read and BabyRead objects.
	 * 
	 * @param redundantRead
	 */
	
	public static <T extends AbstractRead> void setRedundantReadToNull(T redundantRead) {
		redundantRead.setSequence(null);
		redundantRead = null;
	}
	
	@Deprecated
	/**
	 * Coverage calculated as function of GCContent based on numbers in "Enrichment of sequencing of targets from the human
	 * genome by solution hybridization" by Tewhey et al: coverage peaks at about 1.3 when GC content is 0.45, and is about
	 * 0.1 when GC content is 0. Hence we want to calculate mean coverage based on distance from 0.45. Define 2 simultaneous
	 * equations: 0A + B = 1.3 & 0.45A + B = 0.1, and solve to give A = -8/3 and B = 1.3 . Hence meanCoverage = -8/3 * 
	 * |.45-GCContent| + 1.3, then plug this into nextGaussian to get the coverage for this region. The standard deviation
	 * is calculated as a multiple of the mean.
	 * 
	 * @param GcContent - the GC content for this capture probe region.
	 * @return targetCoverage - the target coverage for nucleobases in the capture probe region.
	 */
	
	public static int calculateTargetCoverageFromGCcontent(double GCcontent) {
	
		double coverageMean = -8.0/3.0 * Math.abs(.45-GCcontent) + 1.3;
		double coverageSD = coverageMean * Main.coverageSD;
		int targetCoverage = (int) Math.round(randNumGenerator.nextGaussian()*coverageSD+coverageMean);
		
		return targetCoverage;
		//DEBUGGING
		//return Main.targetAverageCoverage;
		
	}
	
}