1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
|
#include "simulateWithDependence.h"
/*
This code receives a tree file and simulates sequences accordingly using:
simulateTree st1(treeIn, *_sp, alph);
st1.generate_seq(num_pos_with_same_k);
which were written by another beloved group member.
Its feature is to simulate co-evolution between pairs of positions of binary data. Basic logic:
1. the basic concept is to use the regular independent model with 4 states to code a dependent model with 2 states.
thus, all possible pairs of dada: 00, 01, 10, 11 are coded into A, C, G, T
2. dependency between possitions can be described as a tendency to have the same character (that is: 00 or 11). with this model we can accelerate
the rate of evolution when an "unstable" state occures (rate increases when 01 (C) or 10 (G))
For more details, please see http://copap.tau.ac.il/benchmark.php and
Ofir Cohen, Haim Ashkenazy, Eli Levy Karin, David Burstein and Tal Pupko (2013)
CoPAP: Co-evolution of Presence-Absence Patterns.
Nucleic Acids Research 2013; doi: 10.1093/nar/gkt471
Eli Levy Karin, 2013
----------------- usage example: ------------------
#include "simulateWithDependence.h"
using namespace sim_with_dep;
int main(int argc, char** argv) {
string treeFile = argv[1];
double exit_code;
exit_code = simulate_with_dependence (treeFile, 0.5, 14, 500, 500, 0, 1, 0.893195, 1, 4);
return 0;
}
-------------- end usage example: ---------------
*/
namespace sim_with_dep
{
double simulate_with_dependence (string treeFile, double PI_1, double init_k, int total_positions, int num_pos_with_same_k, double k_increase, int is_gamma, double alpha, double beta, int num_cat)
{
//read Newick format tree
tree treeIn(treeFile);
//four states alphabet A C G T (will later be rplaced to 00,01,10,11)
alphabet* alph = new nucleotide;
sequenceContainer SC_all; //this will contain all positions
//parameters:
double PI_0 = 1-PI_1;
double k = init_k; //will be increased with each iteration
//parameters:
int jump_size = total_positions / num_pos_with_same_k;
for(int i=0; i<jump_size; i++)
{
Vdouble freqs; //stationary probabilities PI_00, PI_01, PI_10, PI_11
double TOTAL = k*PI_1*PI_1 + 2*PI_0*PI_1 + k*PI_0*PI_0;
freqs.push_back(k*PI_0*PI_0 / TOTAL); //PI_00 = k*PI_0*PI_0 / TOTAL
freqs.push_back(PI_0*PI_1 / TOTAL); //PI_01 = PI_0*PI_1 / TOTAL
freqs.push_back(PI_0*PI_1 / TOTAL); //PI_10 = PI_0*PI_1 / TOTAL
freqs.push_back(k*PI_1*PI_1 / TOTAL); //PI_11 = k*PI_1*PI_1 / TOTAL
//Q matrix (partial values - the rest are calculated by gtrModel using freqs and these values)
MDOUBLE a2c = PI_1; // --> c2a = freqs[a]*a2c/freqs[c] --> c2a = ((k*PI_0*PI_0 / TOTAL)*PI_1)/(PI_0*PI_1 / TOTAL) = k*PI_0
MDOUBLE a2g = PI_1;
MDOUBLE a2t = 0;
MDOUBLE c2g = 0;
MDOUBLE c2t = k*PI_1;
MDOUBLE g2t = k*PI_1;
//starting the evolutionary model
distribution *currDist = NULL;
if(is_gamma == 1)
{
currDist = new generalGammaDistribution(alpha,beta,num_cat); // ---> in the future we might want to turn these into param
}
else
{
currDist = new uniDistribution; // no among site rate variation
}
replacementModel *probMod = NULL;
pijAccelerator *pijAcc = NULL;
probMod = new gtrModel(freqs,a2c,a2g,a2t,c2g,c2t,g2t);
pijAcc = new trivialAccelerator(probMod);
stochasticProcess* _sp = new stochasticProcess(currDist, pijAcc);
//simulate:
simulateTree st1(treeIn, *_sp, alph);
st1.generate_seq(num_pos_with_same_k); //simulate num_pos_with_same_k positions with the current k
if(i == 0)
{
SC_all = st1.toSeqDataWithoutInternalNodes(); //first time
}
else
{
sequenceContainer SC = st1.toSeqDataWithoutInternalNodes(); //concatenate new positions to the ones you have
SC_all.concatenate(SC);
}
delete currDist;
delete probMod;
delete pijAcc;
delete _sp;
k = k + k_increase; //k = 1 , 1.05 , 1.1 , ... , 5.5
}
//prepare out file name:
std::stringstream sstm;
if(is_gamma == 1)
{
sstm << treeFile << ".gammaRateNoInv.PI_1=" << PI_1 << ".init_k=" << init_k << ".k_group_size=" << num_pos_with_same_k << ".k_increase=" << k_increase << ".fas";
}
else
{
sstm << treeFile << ".NoRate.PI_1=" << PI_1 << ".init_k=" << init_k << ".k_group_size=" << num_pos_with_same_k << ".k_increase=" << k_increase << ".fas";
}
std::string seqOutputFile = sstm.str();
//write out:
ofstream seq_sim(seqOutputFile.c_str());
fastaFormat::write(seq_sim,SC_all);
seq_sim.close();
delete alph;
return 0;
}
};
|