File: MolecularClockF81.bf

package info (click to toggle)
hyphy 2.3.14%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 30,972 kB
  • sloc: cpp: 122,584; sh: 12,310; python: 3,026; xml: 467; lisp: 341; makefile: 313; ansic: 308; sql: 119
file content (196 lines) | stat: -rw-r--r-- 3,872 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
/* This is an example HY-PHY Batch File.



   It reads in a '#' nucleotide dataset data/molclock.seq and performs

   a series of molecular clock tests on the data using the F81 model.

   

   Output is printed out as a Newick Style tree with branch lengths

   representing the number of expected substitutions per branch (which

   is the default setting for nucleotide models w/o rate variation).

   Also, the likelihood ratio statistic is evaluated and the P-values

   for the tests are reported.

   

   

   Sergei L. Kosakovsky Pond and Spencer V. Muse 

   December 1999. 

*/



/* 1. Read in the data and store the result in a DataSet variable.*/



DataSet 		nucleotideSequences = ReadDataFile ("data/molclock.seq");

   

/* 2. Filter the data, specifying that all of the data is to be used

	  and that it is to be treated as nucleotides. */

	  

DataSetFilter	filteredData = CreateFilter (nucleotideSequences,1);



/* 3. Collect observed nucleotide frequencies from the filtered data. observedFreqs will

	  store the vector of frequencies. */



HarvestFrequencies (observedFreqs, filteredData, 1, 1, 1);



F81RateMatrix = 

		{{*,mu,mu,mu}

		 {mu,*,mu,mu}

		 {mu,mu,*,mu}

		 {mu,mu,mu,*}};

		 

/*5.  Define the F81 models, by combining the substitution matrix with the vector of observed 

	  (equilibrium) frequencies. We define one for each block, since the equilibrium 

	  frequencies will differ. */



Model 	F81 = (F81RateMatrix, observedFreqs);



/*6.  Now we can define the tree for the data just read taxa. Notice that 

	  some of the internal nodes are named for later use.*/

	  

Tree  theTree = (TAAJ153,(HVRNASS,(RICRSS3,((ZMSUCS1,(OSRSS1A,(TASUCSYN1,HVSSYNMR)Internal1)),(MZESUS1,ORRSS2)Internal2))));

	  			

/*7.  Since all the likelihood function ingredients (data, tree, equilibrium frequencies)

	  have been defined we are ready to construct the likelihood function. */

	  

LikelihoodFunction  theLnLik = (filteredData, theTree);



/*8.  Maximize the likelihood function, storing parameter values in the matrix paramValues. 

	  We also store the resulting ln-lik and the number of model parameters. */



Optimize (paramValues, theLnLik);

unconstrainedLnLik = paramValues[1][0];

paramCount = paramValues[1][1];



/*9.  Print the tree with optimal branch lengths to the console. */



fprintf  (stdout, "\n 0).UNCONSTRAINED MODEL:", theLnLik);



/*10. Now we impose the molecular clock constraint on the entire tree, 

	  enforcing the clock on parameter mu.*/

	  

MolecularClock (theTree, mu);



/*11. We maximize the tree with molecular clock constraints and report the results.*/



Optimize (paramValues, theLnLik);



lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, paramCount - paramValues[1][1]);



fprintf (stdout, "\n\n1). Global Molecular Clock; the P-value is:", pValue, "\n", theLnLik);

/*12. We can now try to impose molecular clock only on a subtree of the original tree.

	  First we do that for the subtree starting at the node Internal1 */

	  

ClearConstraints (theTree);

MolecularClock (theTree.Internal1, mu);

Optimize (paramValues, theLnLik);



lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, paramCount - paramValues[1][1]);



fprintf (stdout, "\n\n2). Molecular Clock starting at Internal1; the P-value is:", pValue, "\n", theLnLik);



/*12. Secondly, we apply the clock for the subtree starting at the node Internal2, 

	  in addition to the clock imposed in Step 11. */

	  

MolecularClock (theTree.Internal2, mu);

Optimize (paramValues, theLnLik);

lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, paramCount - paramValues[1][1]);



fprintf (stdout, "\n\n3). Molecular Clock starting at Internal1 and Internal2; the P-value is:", pValue, "\n", theLnLik);