File: HKY85relrate.bf

package info (click to toggle)
hyphy 2.5.28%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 24,780 kB
  • sloc: cpp: 76,872; xml: 467; lisp: 341; python: 156; javascript: 117; sh: 106; makefile: 86; ansic: 86
file content (328 lines) | stat: -rw-r--r-- 5,741 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
/* This is an example HY-PHY Batch File.



   It reads in a PHYLIP nucleotide dataset data/3.seq and performs

   the relative rate test on the 3-taxa tree, using HKY85 model.

   

   Output is printed out as a Newick Style tree with branch lengths

   representing the number of expected substitutions per branch (which

   is the default setting for nucleotide models w/o rate variation).

   Also, the likelihood ratio statistic is evaluated and the P-value

   for the test is reported.

   

   

   Sergei L. Kosakovsky Pond and Spencer V. Muse 

   December 1999. 

*/



/* 1. Read in the data and store the result in a DataSet variable.*/



DataSet 		nucleotideSequences = ReadDataFile ("data/3.seq");

   

/* 2. Filter the data, specifying that all of the data is to be used

	  and that it is to be treated as nucleotides. */

	  

DataSetFilter	filteredData = CreateFilter (nucleotideSequences,1);



/* 3. Collect observed nucleotide frequencies from the filtered data. observedFreqs will

	  store the vector of frequencies. */



HarvestFrequencies (observedFreqs, filteredData, 1, 1, 1);



/* 4. Define the HKY substitution matrix. '*' is defined to be -(sum of off-diag row elements) */



HKY85RateMatrix  = 

		{{*,trvs,trst,trvs}

		 {trvs,*,trvs,trst}

		 {trst,trvs,*,trvs}

		 {trvs,trst,trvs,*}};

		 

/*5.  Define the HKY85 model, by combining the substitution matrix with the vector of observed (equilibrium)

	  frequencies. */

	  

Model HKY85	 = (HKY85RateMatrix , observedFreqs);



/*6.  Now we can define the simple three taxa tree.
	  Since there is only 1 three sequence tree, we turn on
	  ALLOW_SEQUENCE_MISMATCH to tell hyphy to map the first
	  sequence in the data to leaf 'a', the 2nd - to leaf 'b' 
	  and the third - leaf 'c'. */

ALLOW_SEQUENCE_MISMATCH = 1;	  

Tree	threeTaxaTree = (a,b,c);



/*7.  Since all the likelihood function ingredients (data, tree, equilibrium frequencies)

	  have been defined we are ready to construct the likelihood function. */

	  

LikelihoodFunction  theLnLik = (filteredData, threeTaxaTree);



/*8.  Maximize the likelihood function, storing parameter values in the matrix paramValues. 

	  We also store the resulting ln-lik. */



Optimize (paramValues, theLnLik);

unconstrainedLnLik = paramValues[1][0];



/*9.  Print the tree with optimal branch lengths to the console. */



fprintf  (stdout, "\n 0).UNCONSTRAINED MODEL:", theLnLik);



/*10. We now constrain the rate of evolution to be equal along the branches leading 

	  to c and b and repeat the optimization. We will impose 3 types of constraints:

	  - transition rates are equal

	  - transversion rates are equal 

	  - both rates are equal.

	  We also compute the ln-lik ratio statistic and the P-Value, using the Chi^2 dist'n 

	  with 1(or 2) degree of freedom.

*/

	  

threeTaxaTree.b.trst := threeTaxaTree.c.trst;

Optimize (paramValues, theLnLik);

	  

lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, 1);



fprintf (stdout, "\n\n1). Outgroup at taxon #1");

fprintf (stdout, "\n\t a). Transition rate test. P-Value = ", pValue,"\n", theLnLik);



ClearConstraints (threeTaxaTree.b.trst);



threeTaxaTree.b.trvs := threeTaxaTree.c.trvs;

Optimize (paramValues, theLnLik);

	  

lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, 1);



fprintf (stdout, "\n\t b). Transversion rate test. P-Value = ", pValue,"\n", theLnLik);



threeTaxaTree.b.trst := threeTaxaTree.c.trst;

Optimize (paramValues, theLnLik);

	  

lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, 2);



fprintf (stdout, "\n\t c). Both rates test. P-Value = ", pValue,"\n", theLnLik);





/*11. Clear the constraints on the tree and repeat the previous steps for other outgroups. */



/* Repeat for taxon #2 */



ClearConstraints (threeTaxaTree);



threeTaxaTree.a.trst := threeTaxaTree.c.trst;

Optimize (paramValues, theLnLik);

	  

lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, 1);



fprintf (stdout, "\n\n2). Outgroup at taxon #2");

fprintf (stdout, "\n\t a). Transition rate test. P-Value = ", pValue,"\n", theLnLik);



ClearConstraints (threeTaxaTree);



threeTaxaTree.a.trvs := threeTaxaTree.c.trvs;

Optimize (paramValues, theLnLik);

	  

lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, 1);



fprintf (stdout, "\n\t b). Transversion rate test. P-Value = ", pValue,"\n", theLnLik);



threeTaxaTree.a.trst := threeTaxaTree.c.trst;

Optimize (paramValues, theLnLik);

	  

lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, 2);



fprintf (stdout, "\n\t c). Both rates test. P-Value = ", pValue,"\n", theLnLik);



/* Repeat for taxon #3 */



ClearConstraints (threeTaxaTree);



threeTaxaTree.a.trst := threeTaxaTree.b.trst;



Optimize (paramValues, theLnLik);

lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, 1);



fprintf (stdout, "\n\n3). Outgroup at taxon #3");

fprintf (stdout, "\n\t a). Transition rate test. P-Value = ", pValue,"\n", theLnLik);



ClearConstraints (threeTaxaTree);

threeTaxaTree.a.trvs := threeTaxaTree.b.trvs;



Optimize (paramValues, theLnLik);

	  lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, 1);



fprintf (stdout, "\n\t b). Transversion rate test. P-Value = ", pValue,"\n", theLnLik);



threeTaxaTree.a.trst := threeTaxaTree.b.trst;



Optimize (paramValues, theLnLik);

lnlikDelta = 2 (unconstrainedLnLik-paramValues[1][0]);

pValue = 1-CChi2 (lnlikDelta, 2);



fprintf (stdout, "\n\t c). Both rates test. P-Value = ", pValue,"\n", theLnLik);