File: MLNH.bpp

package info (click to toggle)
bppsuite 2.4.1-8
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,116 kB
  • sloc: cpp: 3,867; sh: 65; makefile: 5
file content (139 lines) | stat: -rw-r--r-- 5,068 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#Initial log likelihood.................: -67066.6981049241
#Log likelihood.........................: -65187.3822910926
#Full.theta.............................: 0.481587
#Full.theta1............................: 0.568231
#Full.theta2............................: 0.591547
#GTR.a_1................................: 1.27372
#GTR.b_1................................: 0.667001
#GTR.c_1................................: 0.344267
#GTR.d_1................................: 0.339924
#GTR.e_1................................: 0.37557
#GTR.theta_1............................: 0.622987
#GTR.theta1_1...........................: 0.469133
#GTR.theta2_1...........................: 0.547965
#GTR.theta_2............................: 0.491549
#GTR.theta1_2...........................: 0.485613
#GTR.theta2_2...........................: 0.555095
#Gamma.alpha............................: 0.438647


# Global variables:
DATA = LSU

# Sequences:

# The alphabet to use:
# DNA, RNA or Protein
alphabet=DNA

# The sequence file to use (sequences must be aligned!)
input.sequence.file=../../../Data/$(DATA).phy

# The alignment format:
input.sequence.format=Phylip(order=sequential, type=extended)

#Sites to use:
# all, nogap or complete (=only resolved chars)
input.sequence.sites_to_use = complete
# Specify a maximum amount of gaps: may be an absolute number or a percentage.
input.sequence.max_gap_allowed = 100%

# ----------------------------------------------------------------------------------------
#                                     Input tree file
# ----------------------------------------------------------------------------------------

# user or random
init.tree = user

input.tree.file = ../../../Data/$(DATA).dnd
input.tree.format = Newick

init.brlen.method = Input

# ----------------------------------------------------------------------------------------
#                                     Model specification
# ----------------------------------------------------------------------------------------

# Homogeneous model?
# no => Homogeneous case
# one_per_branch => Galtier & Gouy 97 series of NH models
# general => Specify the model by hand.
nonhomogeneous = general

nonhomogeneous.root_freq=Full(init=balanced)

rate_distribution = Gamma(n=4, alpha=0.358)

# Now the general heterogeneous case:
# Specify the number of distincts models.
nonhomogeneous.number_of_models = 2

# Set up each model:
model1 = GTR(initFreqs=observed)
model1.nodes_id=0:62 # The Ids of the nodes to which this model should be assigned.

model2 = GTR(a=model1.GTR.a,\
             b=model1.GTR.b,\
             c=model1.GTR.c,\
             d=model1.GTR.d,\
             e=model1.GTR.e,\
             initFreqs=observed)
model2.nodes_id=63:154

# Likelihood recursion option:
# - simple: derivatives takes more time to compute, but likelihooh computation is faster.
#   For big data sets, it can save a lot of memory usage too, particularly when the data are compressed.
# - double: Uses more memory and need more time to compute likelihood, due to the double recursion.
#   Analytical derivatives are however faster to compute.
likelihood.recursion = simple

# Site compression for the simple recursion:
# - simple: identical sites are not computed twice
# - recursive: look for site patterns to save computation time during optimization, but
#   requires extra time for building the patterns.
#   This is usually the best option, particularly for nucleotides data sets.
likelihood.recursion_simple.compression = recursive

# ----------------------------------------------------------------------------------------
#                                     Optimization
# ----------------------------------------------------------------------------------------


optimization=FullD(derivatives=Newton)
optimization.reparametrization=no
optimization.verbose = 1
optimization.ignore_parameters =
optimization.max_number_f_eval = 10000
optimization.tolerance = 0.000001
optimization.message_handler = $(DATA).messages
optimization.profiler = $(DATA).profile

optimization.topology = no
optimization.topology.nstep=4
optimization.topology.numfirst=no
optimization.topology.tolerance.before=100
optimization.topology.tolerance.during=100
optimization.scale_first=no
optimization.verbose=3

# Should we write the resulting tree? none or file name.
output.tree.file = $(DATA).ML.dnd
output.tree.format = Newick

# Alignment information log file (site specific rates, etc):
output.infos = $(DATA).infos

# Write numerical parameter estimated values:
output.estimates = $(DATA).params.txt

# ----------------------------------------------------------------------------------------
#                                     Bootstrap
# ----------------------------------------------------------------------------------------

bootstrap.number = 0
# Tell if numerical parameters should be kept to their initial value when bootstrapping: 
bootstrap.approximate = no
# Set this to yes for detailed output when bootstrapping. 
bootstrap.verbose = no
bootstrap.output.file = $(DATA).ML_bstrees.dnd