1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
|
#Example data set adapted from PAML
#
#logL = 65442.80353609310986939818
# Global variables:
DATA = lysozymeLarge
# Sequences:
# The alphabet to use:
# DNA, RNA or Protein...
alphabet=Codon(letter=DNA)
genetic_code=Standard
# The sequence file to use (sequences must be aligned!)
input.sequence.file=../../../Data/$(DATA).fasta
# The alignment format:
input.sequence.format=Fasta
#Sites to use:
# all, nogap or complete (=only resolved chars)
input.sequence.sites_to_use = all
# Specify a maximum amount of gaps: may be an absolute number or a percentage.
input.sequence.max_gap_allowed = 50%
input.sequence.max_unresolved_allowed = 100%
input.sequence.remove_stop_codons = yes
# ----------------------------------------------------------------------------------------
# Input tree file
# ----------------------------------------------------------------------------------------
# user or random
init.tree = user
input.tree.file = ../../../Data/$(DATA).dnd
input.tree.format = Newick
init.brlen.method = Input
# ----------------------------------------------------------------------------------------
# Model specification
# ----------------------------------------------------------------------------------------
# See the manual for a description of the syntax and available options.
#
nonhomogeneous=general
nonhomogeneous.number_of_models=2
model1 = YN98(kappa=1, omega=1, frequencies=F1X4)
model1.nodes_id = 0:20
model2 = YN98(kappa=YN98.kappa_1, omega=1, frequencies=F1X4)
model2.nodes_id = 21:32
#These lines are for the F1X4 option:
#nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa,\
# YN98.freq_Codon.123_Full.theta, YN98.freq_Codon.123_Full.theta1, YN98.freq_Codon.123_Full.theta2
#These lines are for the F3X4 option:
#nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa, YN98.123_*
nonhomogeneous.stationarity=yes
#Only if stationarity is set to false:
nonhomogeneous.root_freq=
rate_distribution = Constant() //Gamma(n=4, alpha=0.358)
# Likelihood recursion option:
# - simple: derivatives takes more time to compute, but likelihood computation is faster.
# For big data sets, it can save a lot of memory usage too, particularly when the data are compressed.
# - double: Uses more memory and need more time to compute likelihood, due to the double recursion.
# Analytical derivatives are however faster to compute.
# This option has no effect in the following cases:
# - Topology estimation: this requires a double recursive algorithm,
# - Optimization with a molecular clock: a simple recursion with data compression is used in this case,
# due to the impossibility of computing analytical derivatives.
likelihood.recursion = simple
# Site compression for the simple recursion:
# - simple: identical sites are not computed twice
# - recursive: look for site patterns to save computation time during optimization, but
# requires extra time for building the patterns.
# This is usually the best option, particularly for nucleotide data sets.
likelihood.recursion_simple.compression = recursive
# ----------------------------------------------------------------------------------------
# Optimization
# ----------------------------------------------------------------------------------------
# Should we reestimate likelihood parameters? Tree topology will not be optimized.
# (recommanded)
optimization = FullD(derivatives=Newton)
# Tell if the parameter should be transformed in order to remove constraints.
# This can improves the optimization, but might be a bit slower.
optimization.reparametrization = no
# Final optimization step, may be useful if numerical derivatives are used:
# powell or simplex or none.
optimization.final = none
# Set the quantity of output to the screen:
optimization.verbose = 3
# Parameters to ignore (for instance equilibrium frequencies)
optimization.ignore_parameters =
# Maximum number of likelihood evaluations:
optimization.max_number_f_eval = 10000
# Precision to reach:
optimization.tolerance = 0.000001
# idem for error or warning messages:
optimization.message_handler = $(DATA).messages
# A file where to dump optimization steps (a file path or std for standard output)
optimization.profiler = $(DATA).profile
# Shall we optimize tree topology as well?
optimization.topology = no
# Algorithm to use for topology estimation: only NNI for now
optimization.topology.algorithm = NNI
# NNI method: fast, better or phyml
# You should use the phyml option, since it is really more efficient!
optimization.topology.algorithm_nni.method = phyml
# Number of phyml topology movement steps before reoptimizing parameters:
optimization.topology.nstep = 4
# Shall we estimate parameters before looking for topology movements?
optimization.topology.numfirst = no
# Tolerances: These numbers should not be too low, in order to save computation
# time and also for a better topology estimation.
# The optimization.tolerance parameter will be used for the final optimization
# of numerical parameters.
#
# Tolerance for the prior-topology estimation
optimization.topology.tolerance.before = 100
# Tolerance for the during-topology estimation
optimization.topology.tolerance.during = 100
# Shall we first scale the tree before optimizing parameters? [deprecated]
optimization.scale_first = no
# Should we write the resulting tree? none or file name.
output.tree.file = $(DATA).ML.dnd
output.tree.format = Newick
# Alignment information log file (site specific rates, etc):
output.infos = $(DATA).infos
# Write numerical parameter estimated values:
output.estimates = $(DATA).params.txt
# ----------------------------------------------------------------------------------------
# Bootstrap
# ----------------------------------------------------------------------------------------
bootstrap.number = 0
# Tell if numerical parameters should be kept to their initial value when bootstrapping:
bootstrap.approximate = no
# Set this to yes for detailed output when bootstrapping.
bootstrap.verbose = no
bootstrap.output.file = $(DATA).ML_bstrees.dnd
|