File: ML.bpp

package info (click to toggle)
bppsuite 2.4.1-8
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,116 kB
  • sloc: cpp: 3,867; sh: 65; makefile: 5
file content (172 lines) | stat: -rw-r--r-- 6,201 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#Example data set adapted from PAML
#
#logL = 65442.80353609310986939818

# Global variables:
DATA = lysozymeLarge

# Sequences:

# The alphabet to use:
# DNA, RNA or Protein...
alphabet=Codon(letter=DNA)
genetic_code=Standard

# The sequence file to use (sequences must be aligned!)
input.sequence.file=../../../Data/$(DATA).fasta

# The alignment format:
input.sequence.format=Fasta

#Sites to use:
# all, nogap or complete (=only resolved chars)
input.sequence.sites_to_use = all
# Specify a maximum amount of gaps: may be an absolute number or a percentage.
input.sequence.max_gap_allowed = 50%
input.sequence.max_unresolved_allowed = 100%

input.sequence.remove_stop_codons = yes

# ----------------------------------------------------------------------------------------
#                                     Input tree file
# ----------------------------------------------------------------------------------------

# user or random
init.tree = user

input.tree.file = ../../../Data/$(DATA).dnd
input.tree.format = Newick

init.brlen.method = Input

# ----------------------------------------------------------------------------------------
#                                     Model specification
# ----------------------------------------------------------------------------------------
# See the manual for a description of the syntax and available options.
#
nonhomogeneous=general
nonhomogeneous.number_of_models=2

model1 = YN98(kappa=1, omega=1, frequencies=F1X4)
model1.nodes_id = 0:20

model2 = YN98(kappa=YN98.kappa_1, omega=1, frequencies=F1X4)
model2.nodes_id = 21:32

#These lines are for the F1X4 option:
#nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa,\
#  YN98.freq_Codon.123_Full.theta, YN98.freq_Codon.123_Full.theta1, YN98.freq_Codon.123_Full.theta2
#These lines are for the F3X4 option:
#nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa, YN98.123_*
nonhomogeneous.stationarity=yes
#Only if stationarity is set to false:
nonhomogeneous.root_freq=

rate_distribution = Constant() //Gamma(n=4, alpha=0.358)

# Likelihood recursion option:
# - simple: derivatives takes more time to compute, but likelihood computation is faster.
#   For big data sets, it can save a lot of memory usage too, particularly when the data are compressed.
# - double: Uses more memory and need more time to compute likelihood, due to the double recursion.
#   Analytical derivatives are however faster to compute.
# This option has no effect in the following cases:
# - Topology estimation: this requires a double recursive algorithm,
# - Optimization with a molecular clock: a simple recursion with data compression is used in this case,
#   due to the impossibility of computing analytical derivatives.
likelihood.recursion = simple

# Site compression for the simple recursion:
# - simple: identical sites are not computed twice
# - recursive: look for site patterns to save computation time during optimization, but
#   requires extra time for building the patterns.
#   This is usually the best option, particularly for nucleotide data sets.
likelihood.recursion_simple.compression = recursive

# ----------------------------------------------------------------------------------------
#                                     Optimization
# ----------------------------------------------------------------------------------------

# Should we reestimate likelihood parameters? Tree topology will not be optimized.
# (recommanded)
optimization = FullD(derivatives=Newton)

# Tell if the parameter should be transformed in order to remove constraints.
# This can improves the optimization, but might be a bit slower.
optimization.reparametrization = no

# Final optimization step, may be useful if numerical derivatives are used:
# powell or simplex or none.
optimization.final = none

# Set the quantity of output to the screen:
optimization.verbose = 3

# Parameters to ignore (for instance equilibrium frequencies)
optimization.ignore_parameters = 

# Maximum number of likelihood evaluations:
optimization.max_number_f_eval = 10000

# Precision to reach:
optimization.tolerance = 0.000001

# idem for error or warning messages:
optimization.message_handler = $(DATA).messages

# A file where to dump optimization steps (a file path or std for standard output)
optimization.profiler = $(DATA).profile

# Shall we optimize tree topology as well?
optimization.topology = no

# Algorithm to use for topology estimation: only NNI for now
optimization.topology.algorithm = NNI

# NNI method: fast, better or phyml
# You should use the phyml option, since it is really more efficient!
optimization.topology.algorithm_nni.method = phyml

# Number of phyml topology movement steps before reoptimizing parameters:
optimization.topology.nstep = 4

# Shall we estimate parameters before looking for topology movements?
optimization.topology.numfirst = no

# Tolerances: These numbers should not be too low, in order to save computation
# time and also for a better topology estimation.
# The optimization.tolerance parameter will be used for the final optimization
# of numerical parameters.
#
# Tolerance for the prior-topology estimation
optimization.topology.tolerance.before = 100

# Tolerance for the during-topology estimation
optimization.topology.tolerance.during = 100

# Shall we first scale the tree before optimizing parameters? [deprecated]
optimization.scale_first = no




# Should we write the resulting tree? none or file name.
output.tree.file = $(DATA).ML.dnd
output.tree.format = Newick

# Alignment information log file (site specific rates, etc):
output.infos = $(DATA).infos

# Write numerical parameter estimated values:
output.estimates = $(DATA).params.txt

# ----------------------------------------------------------------------------------------
#                                     Bootstrap
# ----------------------------------------------------------------------------------------

bootstrap.number = 0
# Tell if numerical parameters should be kept to their initial value when bootstrapping: 
bootstrap.approximate = no
# Set this to yes for detailed output when bootstrapping. 
bootstrap.verbose = no
bootstrap.output.file = $(DATA).ML_bstrees.dnd