1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
|
/* function declarations */
/* readhmm */
int readhmm(FILE*, struct hmm_multi_s*, char* path);
/* readhmm_multialpha */
int readhmm_multialpha(FILE*, struct hmm_multi_s*);
void transform_singlehmmfile_to_multi(FILE *hmmfile, FILE *outfile);
int readhmm_check(FILE *hmmfile);
void copy_hmm_struct(struct hmm_multi_s *hmm, struct hmm_multi_s *retrain_hmm);
/* readseqs */
void get_sequences_std(FILE*, struct sequences_s*, struct hmm_s*);
void get_labeled_sequences_std(FILE*, struct sequences_s*, struct hmm_s*);
void get_sequences_fasta(FILE*, struct sequences_s*);
void get_sequences_msa_std(FILE*, FILE*, struct msa_sequences_s*, struct hmm_s*, int, struct replacement_letter_s*);
void get_sequences_msa_prf(FILE *seqfile, FILE *priorfile, struct msa_sequences_s *msa_seq_infop,
struct hmm_s *hmmp, int lead_seq);
/* readseqs_multi */
int seqfile_has_labels(FILE *seqfile);
void get_sequence_fasta_multi(char *seq, struct sequences_multi_s *seq_infop, int seq_nr);
void get_sequences_std_multi(FILE *seqfile, struct sequences_multi_s *seq_infop, struct hmm_multi_s *hmmp, int seq_nr);
void get_sequences_msa_std_multi(FILE*, FILE*, struct msa_sequences_multi_s*, struct hmm_multi_s*,
int, struct replacement_letter_multi_s*);
void get_sequences_msa_prf_multi(FILE *seqfile, FILE *priorfile, struct msa_sequences_multi_s *msa_seq_infop,
struct hmm_multi_s *hmmp);
/* savehmm */
int savehmm(FILE*, struct hmm_multi_s*);
int savehmm_multialpha(FILE*, struct hmm_multi_s*);
/* core_algorithms */
int forward(struct hmm_s*, struct letter_s*, struct forward_s**, double**, int);
int backward(struct hmm_s*, struct letter_s*, struct backward_s**, double*, int);
int viterbi(struct hmm_s*, struct letter_s*, struct viterbi_s**, int);
int one_best(struct hmm_s*, struct letter_s*, struct one_best_s**, double**, int, char*);
int msa_forward(struct hmm_s*, struct msa_sequences_s*, int,
int, int, struct forward_s**, double**, int, int, int, double*);
int msa_backward(struct hmm_s*, struct msa_sequences_s*, int,
int, struct backward_s**, double*, int, int, int, double*);
int msa_viterbi(struct hmm_s*, struct msa_sequences_s*, int,
int, int, struct viterbi_s**, int, int, int, double*);
int msa_one_best(struct hmm_s*, struct msa_sequences_s*, int,
int, int, struct one_best_s**, double**, int, char*, int, int, double*);
/* core_algorithms_multialpha */
int forward_multi(struct hmm_multi_s*, struct letter_s*, struct letter_s*, struct letter_s*, struct letter_s*,
struct forward_s**, double**, int, int);
int backward_multi(struct hmm_multi_s*, struct letter_s*, struct letter_s*, struct letter_s*, struct letter_s*,
struct backward_s**, double*, int, int);
int viterbi_multi(struct hmm_multi_s*, struct letter_s*, struct letter_s*, struct letter_s*, struct letter_s*,
struct viterbi_s**, int, int);
int one_best_multi(struct hmm_multi_s*, struct letter_s*, struct letter_s*, struct letter_s*, struct letter_s*,
struct one_best_s**, double**, int, char*, int);
int msa_forward_multi(struct hmm_multi_s*, struct msa_sequences_multi_s*, int,
int, int, struct forward_s**, double**, int, int, int, int, double*, double*, double*, double*);
int msa_backward_multi(struct hmm_multi_s*, struct msa_sequences_multi_s*, int,
int, struct backward_s**, double*, int, int, int, int, double*, double*, double*, double*);
int msa_viterbi_multi(struct hmm_multi_s*, struct msa_sequences_multi_s*, int,
int, int, struct viterbi_s**, int, int, int, int, double*, double*, double*, double*);
int msa_one_best_multi(struct hmm_multi_s*, struct msa_sequences_multi_s*, int,
int, int, struct one_best_s**, double**, int, char*, int, int, int, double*, double*, double*, double*);
/* tm_core_algorithms */
int tm_viterbi(struct hmm_s*, struct letter_s*, struct viterbi_s**, struct aa_distrib_mtx_s*, int);
/* training_algorithms */
void baum_welch_std(struct hmm_s*, struct sequence_s*, int, int, int);
void baum_welch_dirichlet(struct hmm_s*, struct sequence_s*, int, int, int, int, int);
void extended_baum_welch_dirichlet(struct hmm_s*, struct sequence_s*, int, int, int, int, int);
void msa_baum_welch_dirichlet(struct hmm_s*, struct msa_sequences_s*, int, int, int, int, int, int, int, int, int, int, double*);
void extended_msa_baum_welch_dirichlet(struct hmm_s*, struct msa_sequences_s*, int, int, int, int, int, int, int, int, int, int,
double*);
/* training_algorithms */
void baum_welch_std_multi(struct hmm_multi_s *hmmp, struct sequence_multi_s *seqsp, int nr_seqs, int annealing, int use_labels,
int multi_scoring_method, int use_prior);
void baum_welch_dirichlet_multi(struct hmm_multi_s *hmmp, struct sequence_multi_s *seqsp, int nr_seqs, int annealing, int use_labels,
int use_transition_pseudo_counts, int use_emission_pseudo_counts, int multi_scoring_method,
int use_prior);
void msa_baum_welch_dirichlet_multi(struct hmm_multi_s *hmmp, struct msa_sequences_multi_s *msa_seq_infop, int nr_seqs,
int annealing,
int use_gap_shares, int use_lead_columns, int use_labels, int use_transition_pseudo_counts,
int use_emission_pseudo_counts, int normalize, int scoring_method, int use_nr_occ,
int multi_scoring_method, double *aa_freqs, double *aa_freqs_2, double *aa_freqs_3,
double *aa_freqs_4, int use_prior);
void extended_msa_baum_welch_dirichlet_multi(struct hmm_multi_s *hmmp, struct msa_sequences_multi_s *msa_seq_infop,
int nr_seqs, int annealing,
int use_gap_shares, int use_lead_columns, int use_labels,
int use_transition_pseudo_counts,
int use_emission_pseudo_counts, int normalize, int scoring_method, int use_nr_occ,
int multi_scoring_method, double *aa_freqs, double *aa_freqs_2, double *aa_freqs_3,
double *aa_freqs_4, int use_prior);
/* std_funcs */
void* malloc_or_die(int);
void init_float_mtx(double*, double, int);
void init_viterbi_s_mtx(struct viterbi_s*, double, int);
void printhelp_modhmms();
void printhelp_modhmms_msa();
void printhelp_hmmtrain();
void printhelp_hmmtrain_msa();
void printhelp_modhmms_multialpha();
void printhelp_modhmms_msa_multialpha();
void printhelp_hmmtrain_multialpha();
void printhelp_hmmtrain_msa_multialpha();
void printhelp_modhmms_tm_multialpha();
void printhelp_modhmms_tm_msa_multialpha();
void printhelp_hmmtrain_tm_multialpha();
void printhelp_hmmtrain_tm_msa_multialpha();
void printhelp_modhmms_tm();
void printhelp_modhmms_tm_msa();
void printhelp_hmmtrain_tm();
void printhelp_hmmtrain_tm_msa();
void printhelp_chmmtrain();
void printhelp_chmmtrain_msa();
void printhelp_chmmtrain_multialpha();
void printhelp_chmmtrain_msa_multialpha();
void printhelp_add_alphabet();
void printhelp_add2profilehmm();
void printhelp_cal();
void printhelp_opt();
int get_mtx_index(int,int,int);
int get_alphabet_index(struct letter_s*, char*, int);
int get_alphabet_index_msa_query(char*, char*, int);
int get_replacement_letter_index(struct letter_s*, struct replacement_letter_s*);
int get_replacement_letter_index_multi(struct letter_s *c, struct replacement_letter_multi_s *replacement_letters, int alphabet);
int get_alphabet_index_single(char*, char, int);
int get_replacement_letter_index_single(char*, struct replacement_letter_s*);
int get_seq_length(struct letter_s*);
int path_length(int, int, struct hmm_s*, int);
int path_length_multi(int, int, struct hmm_multi_s*, int);
void print_seq(struct letter_s*, FILE*, int, char*, int);
struct path_element* get_end_path_start(int l, struct hmm_s *hmmp);
struct path_element* get_end_path_start_multi(int l, struct hmm_multi_s *hmmp);
char* get_profile_vertex_type(int, int*);
void get_replacement_letters(FILE*, struct replacement_letter_s*);
void get_aa_distrib_mtx(FILE *distribmtxfile, struct aa_distrib_mtx_s *aa_distrib_mtxp);
void get_replacement_letters_multi(FILE *replfile, struct replacement_letter_multi_s *replacement_lettersp);
char* letter_as_string(struct letter_s*);
char* sequence_as_string(struct letter_s*);
void get_viterbi_label_path(struct viterbi_s *cur, struct hmm_s *hmmp,
struct viterbi_s *viterbi_mtxp, int row, int row_size, char *labels, int *ip);
void get_viterbi_label_path_multi(struct viterbi_s *cur, struct hmm_multi_s *hmmp,
struct viterbi_s *viterbi_mtxp, int row, int row_size, char *labels, int *ip);
void get_viterbi_path(struct viterbi_s *cur, struct hmm_s *hmmp,
struct viterbi_s *viterbi_mtxp, int row, int row_size, int *path, int *ip);
void get_viterbi_path_multi(struct viterbi_s *cur, struct hmm_multi_s *hmmp,
struct viterbi_s *viterbi_mtxp, int row, int row_size, int *path, int *ip);
void itoa(char* s, int nr);
void ftoa(char* s, double nr, int prec);
int read_subst_matrix(double **mtx, FILE *substmtxfile);
int read_subst_matrix_multi(double **mtxpp, double **mtxpp_2, double **mtxpp_3, double **mtxpp_4, FILE *substmtxfile);
int read_prior_file(struct emission_dirichlet_s *em_di, struct hmm_s *hmmp, FILE *priorfile);
int read_frequencies(FILE *freqfile, double **aa_freqs);
int read_frequencies_multi(FILE *freqfile, double **aa_freqsp, double **aa_freqsp_2, double **aa_freqsp_3, double **aa_freqsp_4);
int read_prior_file_multi(struct emission_dirichlet_s *em_di, struct hmm_multi_s *hmmp, FILE *priorfile, int alphabet);
int read_multi_prior_file_multi(struct emission_dirichlet_s *em_di, struct hmm_multi_s *hmmp, FILE *priorfile, int alphabet);
int locked_state(struct hmm_s *hmmp, int v);
int locked_state_multi(struct hmm_multi_s *hmmp, int v);
int get_best_reliability_score(double reliability_score_1, double reliability_score_2, double reliability_score_3);
void hmm_garbage_collection(FILE *hmmfile, struct hmm_s *hmmp);
void hmm_garbage_collection_multi(FILE *hmmfile, struct hmm_multi_s *hmmp);
void hmm_garbage_collection_multi_no_dirichlet(FILE *hmmfile, struct hmm_multi_s *hmmp);
void msa_seq_garbage_collection_multi(struct msa_sequences_multi_s *msa_seq_info, int nr_alphabets);
void seq_garbage_collection_multi(struct sequences_multi_s *seq_info, int nr_alphabets);
void get_msa_labels(FILE *labelfile, struct msa_sequences_s *msa_seq_infop, struct hmm_s *hmmp);
void get_msa_labels_all_columns(FILE *labelfile, struct msa_sequences_s *msa_seq_infop, struct hmm_s *hmmp);
int update_shares_prior(struct emission_dirichlet_s *em_di, struct hmm_s *hmmp,
struct msa_sequences_s *msa_seq_infop, int l);
int replacement_letter(struct letter_s *cur_letterp, struct replacement_letter_s *replacement_letters,
struct msa_sequences_s *msa_seq_infop, struct hmm_s *hmmp, int seq_pos);
void get_labels_multi(FILE *labelfile, struct sequences_multi_s *seq_infop, struct hmm_multi_s *hmmp, int seq_nr);
void get_msa_labels_multi(FILE *labelfile, struct msa_sequences_multi_s *msa_seq_infop, struct hmm_multi_s *hmmp);
void get_msa_labels_all_columns_multi(FILE *labelfile, struct msa_sequences_multi_s *msa_seq_infop, struct hmm_multi_s *hmmp);
int update_shares_prior_multi(struct emission_dirichlet_s *em_di, struct hmm_multi_s *hmmp,
struct msa_sequences_multi_s *msa_seq_infop, int l, int alphabet);
int replacement_letter_multi(struct letter_s *cur_letterp, struct replacement_letter_multi_s *replacement_letters,
struct msa_sequences_multi_s *msa_seq_infop, struct hmm_multi_s *hmmp, int seq_pos, int alphabet);
int get_nr_alphabets(FILE *hmmfile);
void get_set_of_labels(struct hmm_s *hmmp);
void get_set_of_labels_multi(struct hmm_multi_s *hmmp);
void get_reverse_msa_seq_multi(struct msa_sequences_multi_s *msa_seq_infop, struct msa_sequences_multi_s *reverse_msa_seq_infop,
struct hmm_multi_s *hmmp);
void get_reverse_seq_multi(struct sequence_multi_s *seqs, struct letter_s **reverse_seq_1,
struct letter_s **reverse_seq_2, struct letter_s **reverse_seq_3,
struct letter_s **reverse_seq_4, struct hmm_multi_s *hmmp, int seq_len);
/* std calculation funcs */
void add_to_E_continuous(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, double *emissions);
double get_single_gaussian_statescore(double mu, double sigma_square, double letter);
double get_dp_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
int p, double *emissions, int vertex, int normalize, double *gap_shares);
double get_dp_picasso_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
int p, double *emissions, int vertex, int normalize, double *gap_shares, double *aa_freqs);
double get_sjolander_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
int p, double *emissions, int vertex, int normalize, double *gap_shares);
double get_sjolander_reversed_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
int p, double *emissions, int vertex, int normalize, double *gap_shares);
double get_picasso_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
int p, double *emissions, int vertex, int normalize, double *gap_shares, double *aa_freqs);
double get_picasso_sym_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
int p, double *emissions, int vertex, int normalize, double *gap_shares, double *aa_freqs);
double get_subst_mtx_product_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
int p, double *emissions, int vertex, double *subst_mtx);
double get_subst_mtx_dot_product_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
int p, double *emissions, int vertex, int normalize, double *gap_shares,
int query_index, double *subst_mtx);
double get_subst_mtx_dot_product_prior_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
int p, double *emissions, int vertex, int normalize, double *gap_shares,
int query_index, double *subst_mtx);
void add_to_E_dot_product(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize);
void add_to_E_dot_product_picasso(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize);
void add_to_E_picasso(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize);
void add_to_E_picasso_sym(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize);
void add_to_E_sjolander_score(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize);
void add_to_E_sjolander_reversed_score(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize);
void add_to_E_subst_mtx_product(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize, double *subst_mtx);
void add_to_E_subst_mtx_dot_product(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize, double *subst_mtx, char *alphabet);
void add_to_E_subst_mtx_dot_product_prior(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize, double *subst_mtx, char *alphabet);
void add_to_E_dot_product_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize);
void add_to_E_dot_product_picasso_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize);
void add_to_E_picasso_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize);
void add_to_E_picasso_sym_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize);
void add_to_E_sjolander_score_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize);
void add_to_E_sjolander_reversed_score_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize);
void add_to_E_subst_mtx_product_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize, double *subst_mtx);
void add_to_E_subst_mtx_dot_product_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize, double *subst_mtx, char *alphabet);
void add_to_E_subst_mtx_dot_product_prior_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
int k, int a_size, int normalize, double *subst_mtx, char *alphabet);
void update_labelings(struct one_best_s *cur_rowp, char *vertex_labels,
int *sorted_v_list, int seq_len, int c, char *labels, int nr_of_labels, int nr_v);
void deallocate_row_labelings(struct one_best_s *prev_rowp, int nr_v);
/* debug_funcs */
void dump_trans_matrix(int,int,double*);
void dump_emiss_matrix(int,int,double*);
void dump_forward_matrix(int,int,struct forward_s*);
void dump_backward_matrix(int,int,struct backward_s*);
void dump_one_best_matrix(int, int, struct one_best_s*);
void dump_scaling_array(int,double*);
void dump_from_trans_array(int,struct path_element**);
void dump_to_trans_array(int,struct path_element**);
void dump_viterbi_path(struct viterbi_s*, struct hmm_s*, struct viterbi_s*, int, int);
void dump_viterbi_label_path(struct viterbi_s*, struct hmm_s*, struct viterbi_s*, int, int);
void dump_T_matrix(int,int,double*);
void dump_E_matrix(int,int,double*);
void dump_distrib_groups(int*, int);
void dump_trans_tie_groups(struct transition_s*, int);
void dump_prior_struct(struct emission_dirichlet_s*);
void dump_silent_vertices_multi(struct hmm_multi_s *hmmp);
void dump_msa_seqs(struct msa_sequences_s*, int);
void dump_msa_seqs_multi(struct msa_sequences_s*, struct hmm_multi_s*);
void dump_to_silent_trans_array(int, int**);
void dump_v_list(int*);
void dump_labeling(char*, int);
void dump_label_tmp_list(int *list);
void check_for_corrupt_values(int nr_rows, int nr_cols, double *mtx, char *name);
|