File: ML.bpp

package info (click to toggle)
bppsuite 2.4.1-8
links: PTS, VCS
area: main
in suites: forky, sid
size: 2,116 kB
sloc: cpp: 3,867; sh: 65; makefile: 5
file content (172 lines) | stat: -rw-r--r-- 6,201 bytes
parent folder | download | duplicates (5)
#Example data set adapted from PAML
#
#logL = 65442.80353609310986939818

# Global variables:
DATA = lysozymeLarge

# Sequences:

# The alphabet to use:
# DNA, RNA or Protein...
alphabet=Codon(letter=DNA)
genetic_code=Standard

# The sequence file to use (sequences must be aligned!)
input.sequence.file=../../../Data/$(DATA).fasta

# The alignment format:
input.sequence.format=Fasta

#Sites to use:
# all, nogap or complete (=only resolved chars)
input.sequence.sites_to_use = all
# Specify a maximum amount of gaps: may be an absolute number or a percentage.
input.sequence.max_gap_allowed = 50%
input.sequence.max_unresolved_allowed = 100%

input.sequence.remove_stop_codons = yes

# ----------------------------------------------------------------------------------------
#                                     Input tree file
# ----------------------------------------------------------------------------------------

# user or random
init.tree = user

input.tree.file = ../../../Data/$(DATA).dnd
input.tree.format = Newick

init.brlen.method = Input

# ----------------------------------------------------------------------------------------
#                                     Model specification
# ----------------------------------------------------------------------------------------
# See the manual for a description of the syntax and available options.
#
nonhomogeneous=general
nonhomogeneous.number_of_models=2

model1 = YN98(kappa=1, omega=1, frequencies=F1X4)
model1.nodes_id = 0:20

model2 = YN98(kappa=YN98.kappa_1, omega=1, frequencies=F1X4)
model2.nodes_id = 21:32

#These lines are for the F1X4 option:
#nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa,\
#  YN98.freq_Codon.123_Full.theta, YN98.freq_Codon.123_Full.theta1, YN98.freq_Codon.123_Full.theta2
#These lines are for the F3X4 option:
#nonhomogeneous_one_per_branch.shared_parameters=YN98.kappa, YN98.123_*
nonhomogeneous.stationarity=yes
#Only if stationarity is set to false:
nonhomogeneous.root_freq=

rate_distribution = Constant() //Gamma(n=4, alpha=0.358)

# Likelihood recursion option:
# - simple: derivatives takes more time to compute, but likelihood computation is faster.
#   For big data sets, it can save a lot of memory usage too, particularly when the data are compressed.
# - double: Uses more memory and need more time to compute likelihood, due to the double recursion.
#   Analytical derivatives are however faster to compute.
# This option has no effect in the following cases:
# - Topology estimation: this requires a double recursive algorithm,
# - Optimization with a molecular clock: a simple recursion with data compression is used in this case,
#   due to the impossibility of computing analytical derivatives.
likelihood.recursion = simple

# Site compression for the simple recursion:
# - simple: identical sites are not computed twice
# - recursive: look for site patterns to save computation time during optimization, but
#   requires extra time for building the patterns.
#   This is usually the best option, particularly for nucleotide data sets.
likelihood.recursion_simple.compression = recursive

# ----------------------------------------------------------------------------------------
#                                     Optimization
# ----------------------------------------------------------------------------------------

# Should we reestimate likelihood parameters? Tree topology will not be optimized.
# (recommanded)
optimization = FullD(derivatives=Newton)

# Tell if the parameter should be transformed in order to remove constraints.
# This can improves the optimization, but might be a bit slower.
optimization.reparametrization = no

# Final optimization step, may be useful if numerical derivatives are used:
# powell or simplex or none.
optimization.final = none

# Set the quantity of output to the screen:
optimization.verbose = 3

# Parameters to ignore (for instance equilibrium frequencies)
optimization.ignore_parameters = 

# Maximum number of likelihood evaluations:
optimization.max_number_f_eval = 10000

# Precision to reach:
optimization.tolerance = 0.000001

# idem for error or warning messages:
optimization.message_handler = $(DATA).messages

# A file where to dump optimization steps (a file path or std for standard output)
optimization.profiler = $(DATA).profile

# Shall we optimize tree topology as well?
optimization.topology = no

# Algorithm to use for topology estimation: only NNI for now
optimization.topology.algorithm = NNI

# NNI method: fast, better or phyml
# You should use the phyml option, since it is really more efficient!
optimization.topology.algorithm_nni.method = phyml

# Number of phyml topology movement steps before reoptimizing parameters:
optimization.topology.nstep = 4

# Shall we estimate parameters before looking for topology movements?
optimization.topology.numfirst = no

# Tolerances: These numbers should not be too low, in order to save computation
# time and also for a better topology estimation.
# The optimization.tolerance parameter will be used for the final optimization
# of numerical parameters.
#
# Tolerance for the prior-topology estimation
optimization.topology.tolerance.before = 100

# Tolerance for the during-topology estimation
optimization.topology.tolerance.during = 100

# Shall we first scale the tree before optimizing parameters? [deprecated]
optimization.scale_first = no




# Should we write the resulting tree? none or file name.
output.tree.file = $(DATA).ML.dnd
output.tree.format = Newick

# Alignment information log file (site specific rates, etc):
output.infos = $(DATA).infos

# Write numerical parameter estimated values:
output.estimates = $(DATA).params.txt

# ----------------------------------------------------------------------------------------
#                                     Bootstrap
# ----------------------------------------------------------------------------------------

bootstrap.number = 0
# Tell if numerical parameters should be kept to their initial value when bootstrapping: 
bootstrap.approximate = no
# Set this to yes for detailed output when bootstrapping. 
bootstrap.verbose = no
bootstrap.output.file = $(DATA).ML_bstrees.dnd