File: phylotesting.h

package info (click to toggle)
iqtree 1.5.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 9,780 kB
  • ctags: 11,529
  • sloc: cpp: 96,162; ansic: 59,874; python: 242; sh: 189; makefile: 45
file content (147 lines) | stat: -rw-r--r-- 5,005 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
/*
 * phylotesting.h
 *
 *  Created on: Aug 23, 2013
 *      Author: minh
 */

#ifndef PHYLOTESTING_H_
#define PHYLOTESTING_H_

#include "tools.h"

class PhyloTree;
class IQTree;


struct ModelInfo {
	string set_name; // subset name
	string name; // model name
	double logl; // tree log likelihood
	int df;      // #parameters
    double tree_len; // tree length, added 2015-06-24 for rcluster algorithm
    string tree; // added 2015-04-28: tree string
	double AIC_score, AICc_score, BIC_score;    // scores
	double AIC_weight, AICc_weight, BIC_weight; // weights
	bool AIC_conf, AICc_conf, BIC_conf;         // in confidence set?
};


struct TreeInfo {
	double logl; // log likelihood
	double se; // standard error of deltaL (logl difference to max), or square root of variance
	double rell_bp; // bootstrap proportion by RELL method
	bool rell_confident; // confidence set for RELL-BP
	double sh_pvalue; // p-value by Shimodaira-Hasegawa test
	double wsh_pvalue; // p-value by weighted Shimodaira-Hasegawa test
	double kh_pvalue; // p-value by Kishino-Hasegawa test
	double wkh_pvalue; // p-value by weighted Kishino-Hasegawa test
	double elw_value; // ELW - expected likelihood weights test
	bool elw_confident; // to represent confidence set of ELW test
    double au_pvalue; // p-value by approximately unbiased (AU) test
};


/**
 * computing AIC, AICc, and BIC scores
 */
void computeInformationScores(double tree_lh, int df, int ssize, double &AIC, double &AICc, double &BIC);

/**
 * check if the model file contains correct information
 * @param model_file model file names
 * @param model_name (OUT) vector of model names
 * @param lh_scores (OUT) vector of tree log-likelihoods
 * @param df_vec (OUT) vector of degrees of freedom (or K)
 * @return TRUE if success, FALSE failed.
 */

bool checkModelFile(string model_file, bool is_partitioned, vector<ModelInfo> &infos);

/**
 testing the best-fit model
 return in params.freq_type and params.rate_type
 @param set_name for partitioned analysis
 @param in_tree phylogenetic tree
 @param model_info (IN/OUT) information for all models considered
 @param set_name for partition model selection
 @param print_mem_usage true to print RAM memory used (default: false) 
 @return name of best-fit-model
 */
string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_info, ostream &fmodel,
		ModelsBlock *models_block, int num_threads, string set_name = "", bool print_mem_usage = false);

/**
 * print site log likelihoods to a fileExists
 * @param filename output file name
 * @param tree phylogenetic tree
 * @param ptn_lh pattern log-likelihoods, will be computed if NULL
 * @param append TRUE to append to existing file, FALSE otherwise
 * @param linename name of the line, default "Site_Lh" if NULL
 */
void printSiteLh(const char*filename, PhyloTree *tree, double *ptn_lh = NULL,
		bool append = false, const char *linename = NULL);

/**
 * print partition log likelihoods to a file
 * @param filename output file name
 * @param tree phylogenetic tree
 * @param ptn_lh pattern log-likelihoods, will be computed if NULL
 * @param append TRUE to append to existing file, FALSE otherwise
 * @param linename name of the line, default "Site_Lh" if NULL
 */
void printPartitionLh(const char*filename, PhyloTree *tree, double *ptn_lh = NULL,
		bool append = false, const char *linename = NULL);

/**
 * print site log likelihoods per category to a file
 * @param filename output file name
 * @param tree phylogenetic tree
 */
void printSiteLhCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl);

/**
 * print site posterior probabilities per rate/mixture category to a file
 * @param filename output file name
 * @param tree phylogenetic tree
 */
void printSiteProbCategory(const char*filename, PhyloTree *tree, SiteLoglType wsl);

/**
 * print site state frequency vectors (for Huaichun)
 * @param filename output file name
 * @param tree phylogenetic tree
*/
void printSiteStateFreq(const char*filename, PhyloTree *tree, double *state_freqs = NULL);

/**
    print ancestral sequences
    @param filename output file name
    @param tree phylogenetic tree
    @param ast either AST_MARGINAL or AST_JOINT
*/
void printAncestralSequences(const char*filename, PhyloTree *tree, AncestralSeqType ast);

/**
 * Evaluate user-trees with possibility of tree topology tests
 * @param params program parameters
 * @param tree current tree
 * @param info (OUT) output information
 * @param distinct_ids IDs of distinct trees
 */
void evaluateTrees(Params &params, IQTree *tree, vector<TreeInfo> &info, IntVector &distinct_ids);

void evaluateTrees(Params &params, IQTree *tree);

/**
    get sequence type for a model name
    @param model_name model name string
    @param seq_type (OUT) sequence type, SEQ_UNKNOWN if is not determined
    @return 1 for parametric model, 2 for empirical model
*/
int getSeqType(const char *model_name, SeqType &seq_type);

string getSeqType(string model_name);


#endif /* PHYLOTESTING_H_ */