File: HKY852blocks2models.bf

package info (click to toggle)
hyphy 2.5.69%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 26,728 kB
  • sloc: cpp: 81,964; xml: 467; lisp: 341; python: 166; javascript: 117; sh: 106; makefile: 87; ansic: 86
file content (146 lines) | stat: -rw-r--r-- 2,739 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
/* This is an example HY-PHY Batch File.



   It reads a '#' nucleotide dataset data/actin2.flt, parititions

   the data into two blocks - each third nucleotide position

   and the rest. We then apply HKY85 to the 1st partition and

   F81 to the 2nd.

      

   Output is printed out as a Newick Style tree with branch lengths

   representing the number of expected substitutions per branch (which

   is the default setting for nucleotide models w/o rate variation).

   Also, the likelihood ratio statistic is evaluated and the P-value

   for the test is reported.

   

   Sergei L. Kosakovsky Pond and Spencer V. Muse 

   December 1999. 

*/



/* 1. Read in the data and store the result in  DataSet variables.

	  The variable nuclotideSequence.sites holds the number of sites

	  in the data set. */



DataSet 		nucleotideSequence = ReadDataFile ("data/actin2.flt");

   

/* 2. Filter the data. 

	  "<001>" defines a comb of size 3, choosing the 3rd element of each triplet

	  "<110>" defines a comb of size 3, choosing the 1st and 2nd elements of each triplet */

	    

DataSetFilter	filteredData1 	= CreateFilter (nucleotideSequence,1,"<001>");

DataSetFilter	filteredData2 	= CreateFilter (nucleotideSequence,1,"<110>");



/* 3. Collect observed nucleotide frequencies from the filtered data. observedFreqs will

	  store the vector of frequencies. */



HarvestFrequencies (observedFreqs1, filteredData1, 1, 1, 1);

HarvestFrequencies (observedFreqs2, filteredData2, 1, 1, 1);



/* 4. Define the HKY85 substitution matrix. '*' is defined to be -(sum of off-diag row elements) */



HKY85RateMatrix = 

		{{*,trvs,trst,trvs}

		 {trvs,*,trvs,trst}

		 {trst,trvs,*,trvs}

		 {trvs,trst,trvs,*}};

		 		 

F81RateMatrix = 

		{{*,mu,mu,mu}

		 {mu,*,mu,mu}

		 {mu,mu,*,mu}

		 {mu,mu,mu,*}};



/*5.  Define the models.*/



Model 	HKY85 = (HKY85RateMatrix, observedFreqs1);

Model 	F81 =   (F81RateMatrix, observedFreqs2);



/*6.  Define 2 trees, one for each block. Even though the topology is the same,

	  the trees will have separate branch parameters for each partition.

*/



UseModel (HKY85);

Tree	actinTree1 = (ZMU60513,ZMU60514,((((((ZMU60511,ZMU60510),OSRAC2),(OSRAC1,SVSOAC1)),((OSRAC3,ZMU60507),ZMU60509)),(MZEACT1G,OSRAC7)),ZMU60508));

UseModel (F81);

Tree	actinTree2 = (ZMU60513,ZMU60514,((((((ZMU60511,ZMU60510),OSRAC2),(OSRAC1,SVSOAC1)),((OSRAC3,ZMU60507),ZMU60509)),(MZEACT1G,OSRAC7)),ZMU60508));



/*7. 	Set up the likelihood function, maximize, print results */



LikelihoodFunction  theLnLik = (filteredData1, actinTree1,  

                                filteredData2, actinTree2);



Optimize (paramValues, theLnLik);



fprintf  (stdout, "\n ----- RESULTS ----- \n", theLnLik);