File: simulateWithDependence.cpp

package info (click to toggle)
fastml 3.11-4
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 5,772 kB
sloc: cpp: 48,522; perl: 3,588; ansic: 819; makefile: 386; python: 83; sh: 55
file content (142 lines) | stat: -rw-r--r-- 4,698 bytes
parent folder | download | duplicates (5)
#include "simulateWithDependence.h"

/*
This code receives a tree file and simulates sequences accordingly using:
simulateTree st1(treeIn, *_sp, alph);
st1.generate_seq(num_pos_with_same_k);
which were written by another beloved group member.

Its feature is to simulate co-evolution between pairs of positions of binary data. Basic logic:
1. the basic concept is to use the regular independent model with 4 states to code a dependent model with 2 states.
   thus, all possible pairs of dada: 00, 01, 10, 11 are coded into A, C, G, T
2. dependency between possitions can be described as a tendency to have the same character (that is: 00 or 11). with this model we can accelerate
   the rate of evolution when an "unstable" state occures (rate increases when 01 (C) or 10 (G))

For more details, please see http://copap.tau.ac.il/benchmark.php and
Ofir Cohen, Haim Ashkenazy, Eli Levy Karin, David Burstein and Tal Pupko (2013)
CoPAP: Co-evolution of Presence-Absence Patterns.
Nucleic Acids Research 2013; doi: 10.1093/nar/gkt471

Eli Levy Karin, 2013

----------------- usage example: ------------------
#include "simulateWithDependence.h"

using namespace sim_with_dep;

int main(int argc, char** argv) {
	
	string treeFile = argv[1];

	double exit_code;

	exit_code = simulate_with_dependence (treeFile, 0.5, 14, 500, 500, 0, 1, 0.893195, 1, 4);
	
	return 0;
}
-------------- end usage example: ---------------

*/
namespace sim_with_dep 
{

	double simulate_with_dependence (string treeFile, double PI_1, double init_k, int total_positions, int num_pos_with_same_k, double k_increase, int is_gamma, double alpha, double beta, int num_cat)
	{

		//read Newick format tree
		tree treeIn(treeFile);
	
		//four states alphabet A C G T (will later be rplaced to 00,01,10,11)
		alphabet* alph = new nucleotide;

		sequenceContainer SC_all; //this will contain all positions
	
		//parameters:
		double PI_0 = 1-PI_1;
		double k = init_k; //will be increased with each iteration

		//parameters:
		int jump_size = total_positions / num_pos_with_same_k;

		for(int i=0; i<jump_size; i++)
		{
			Vdouble freqs; //stationary probabilities PI_00, PI_01, PI_10, PI_11
			double TOTAL = k*PI_1*PI_1 + 2*PI_0*PI_1 + k*PI_0*PI_0;
			freqs.push_back(k*PI_0*PI_0 / TOTAL); //PI_00 = k*PI_0*PI_0 / TOTAL
			freqs.push_back(PI_0*PI_1 / TOTAL); //PI_01 = PI_0*PI_1 / TOTAL
			freqs.push_back(PI_0*PI_1 / TOTAL); //PI_10 = PI_0*PI_1 / TOTAL
			freqs.push_back(k*PI_1*PI_1 / TOTAL); //PI_11 = k*PI_1*PI_1 / TOTAL
	
			//Q matrix (partial values - the rest are calculated by gtrModel using freqs and these values)
			MDOUBLE a2c = PI_1; // --> c2a = freqs[a]*a2c/freqs[c] --> c2a = ((k*PI_0*PI_0 / TOTAL)*PI_1)/(PI_0*PI_1 / TOTAL) = k*PI_0
			MDOUBLE a2g = PI_1;
			MDOUBLE a2t = 0;
			MDOUBLE c2g = 0;
			MDOUBLE c2t = k*PI_1;
			MDOUBLE g2t = k*PI_1;

			//starting the evolutionary model
			distribution *currDist = NULL;
			if(is_gamma == 1)
			{
				currDist = new generalGammaDistribution(alpha,beta,num_cat); // ---> in the future we might want to turn these into param
			}
			else
			{
				currDist = new uniDistribution; // no among site rate variation
			}
		
			replacementModel *probMod = NULL;
			pijAccelerator *pijAcc = NULL;

			probMod = new gtrModel(freqs,a2c,a2g,a2t,c2g,c2t,g2t);
			pijAcc = new trivialAccelerator(probMod);
			stochasticProcess* _sp = new stochasticProcess(currDist, pijAcc);

			//simulate:
			simulateTree st1(treeIn, *_sp, alph);
			st1.generate_seq(num_pos_with_same_k); //simulate num_pos_with_same_k positions with the current k

			if(i == 0)
			{
				SC_all = st1.toSeqDataWithoutInternalNodes(); //first time
			}
			else
			{
				sequenceContainer SC = st1.toSeqDataWithoutInternalNodes(); //concatenate new positions to the ones you have
				SC_all.concatenate(SC);
			}
		
			delete currDist;
			delete probMod;
			delete pijAcc;
			delete _sp;

			k = k + k_increase; //k = 1 , 1.05 , 1.1 , ... , 5.5
		}

		//prepare out file name:
		std::stringstream sstm;
		if(is_gamma == 1)
		{
			sstm << treeFile << ".gammaRateNoInv.PI_1=" << PI_1 << ".init_k=" << init_k << ".k_group_size=" << num_pos_with_same_k << ".k_increase=" << k_increase << ".fas";
		}
		else
		{
			sstm << treeFile << ".NoRate.PI_1=" << PI_1 << ".init_k=" << init_k << ".k_group_size=" << num_pos_with_same_k << ".k_increase=" << k_increase << ".fas";
		}
		std::string seqOutputFile = sstm.str();

		//write out:
		ofstream seq_sim(seqOutputFile.c_str());
		fastaFormat::write(seq_sim,SC_all);
		seq_sim.close();
	
	
		delete alph;
		
		return 0;
	
	}

};