File: funcs.h

package info (click to toggle)
libpsortb 1.0%2Bdfsg-7
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,884 kB
  • sloc: ansic: 16,253; cpp: 515; makefile: 46
file content (292 lines) | stat: -rw-r--r-- 18,297 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292



/* function declarations */

/* readhmm */
int readhmm(FILE*, struct hmm_multi_s*, char* path);

/* readhmm_multialpha */
int readhmm_multialpha(FILE*, struct hmm_multi_s*);
void transform_singlehmmfile_to_multi(FILE *hmmfile, FILE *outfile);
int readhmm_check(FILE *hmmfile);
void copy_hmm_struct(struct hmm_multi_s *hmm, struct hmm_multi_s *retrain_hmm);

/* readseqs */
void get_sequences_std(FILE*, struct sequences_s*, struct hmm_s*);
void get_labeled_sequences_std(FILE*, struct sequences_s*, struct hmm_s*);
void get_sequences_fasta(FILE*, struct sequences_s*);
void get_sequences_msa_std(FILE*, FILE*, struct msa_sequences_s*, struct hmm_s*, int, struct replacement_letter_s*);
void get_sequences_msa_prf(FILE *seqfile, FILE *priorfile, struct msa_sequences_s *msa_seq_infop,
			   struct hmm_s *hmmp, int lead_seq);


/* readseqs_multi */
int seqfile_has_labels(FILE *seqfile);
void get_sequence_fasta_multi(char *seq, struct sequences_multi_s *seq_infop, int seq_nr);
void get_sequences_std_multi(FILE *seqfile, struct sequences_multi_s *seq_infop, struct hmm_multi_s *hmmp, int seq_nr);
void get_sequences_msa_std_multi(FILE*, FILE*, struct msa_sequences_multi_s*, struct hmm_multi_s*,
				 int, struct replacement_letter_multi_s*);
void get_sequences_msa_prf_multi(FILE *seqfile, FILE *priorfile, struct msa_sequences_multi_s *msa_seq_infop,
				 struct hmm_multi_s *hmmp);

/* savehmm */
int savehmm(FILE*, struct hmm_multi_s*);
int savehmm_multialpha(FILE*, struct hmm_multi_s*);


/* core_algorithms */
int forward(struct hmm_s*, struct letter_s*, struct forward_s**, double**, int);
int backward(struct hmm_s*, struct letter_s*, struct backward_s**, double*, int);
int viterbi(struct hmm_s*, struct letter_s*, struct viterbi_s**, int);
int one_best(struct hmm_s*, struct letter_s*, struct one_best_s**, double**, int, char*);
int msa_forward(struct hmm_s*, struct msa_sequences_s*, int,
		int, int, struct forward_s**, double**, int, int, int, double*);
int msa_backward(struct hmm_s*, struct msa_sequences_s*, int,
		int, struct backward_s**, double*, int, int, int, double*);
int msa_viterbi(struct hmm_s*, struct msa_sequences_s*, int,
		int, int, struct viterbi_s**, int, int, int, double*);
int msa_one_best(struct hmm_s*, struct msa_sequences_s*, int,
		int, int, struct one_best_s**, double**, int, char*, int, int, double*);


/* core_algorithms_multialpha */
int forward_multi(struct hmm_multi_s*, struct letter_s*,  struct letter_s*,  struct letter_s*,  struct letter_s*,
	    struct forward_s**, double**, int, int);
int backward_multi(struct hmm_multi_s*, struct letter_s*,  struct letter_s*,  struct letter_s*,  struct letter_s*,
	     struct backward_s**, double*, int, int);
int viterbi_multi(struct hmm_multi_s*, struct letter_s*,  struct letter_s*,  struct letter_s*,  struct letter_s*,
	    struct viterbi_s**, int, int);
int one_best_multi(struct hmm_multi_s*, struct letter_s*,  struct letter_s*,  struct letter_s*,  struct letter_s*,
	     struct one_best_s**, double**, int, char*, int);
int msa_forward_multi(struct hmm_multi_s*, struct msa_sequences_multi_s*, int,
		int, int, struct forward_s**, double**, int, int, int, int, double*, double*, double*, double*);
int msa_backward_multi(struct hmm_multi_s*, struct msa_sequences_multi_s*, int,
		       int, struct backward_s**, double*, int, int, int, int, double*, double*, double*, double*);
int msa_viterbi_multi(struct hmm_multi_s*, struct msa_sequences_multi_s*, int,
		int, int, struct viterbi_s**, int, int, int, int, double*, double*, double*, double*);
int msa_one_best_multi(struct hmm_multi_s*, struct msa_sequences_multi_s*, int,
		int, int, struct one_best_s**, double**, int, char*, int, int, int, double*, double*, double*, double*);

/* tm_core_algorithms */
int tm_viterbi(struct hmm_s*, struct letter_s*, struct viterbi_s**, struct aa_distrib_mtx_s*, int);


/* training_algorithms */
void baum_welch_std(struct hmm_s*, struct sequence_s*, int, int, int);
void baum_welch_dirichlet(struct hmm_s*, struct sequence_s*, int, int, int, int, int);
void extended_baum_welch_dirichlet(struct hmm_s*, struct sequence_s*, int, int, int, int, int);
void msa_baum_welch_dirichlet(struct hmm_s*, struct msa_sequences_s*, int, int, int, int, int, int, int, int, int, int, double*);
void extended_msa_baum_welch_dirichlet(struct hmm_s*, struct msa_sequences_s*, int, int, int, int, int, int, int, int, int, int,
				       double*);

/* training_algorithms */
void baum_welch_std_multi(struct hmm_multi_s *hmmp, struct sequence_multi_s *seqsp, int nr_seqs, int annealing, int use_labels,
			  int multi_scoring_method, int use_prior);
void baum_welch_dirichlet_multi(struct hmm_multi_s *hmmp, struct sequence_multi_s *seqsp, int nr_seqs, int annealing, int use_labels,
				int use_transition_pseudo_counts, int use_emission_pseudo_counts, int multi_scoring_method,
				int use_prior);
void msa_baum_welch_dirichlet_multi(struct hmm_multi_s *hmmp, struct msa_sequences_multi_s *msa_seq_infop, int nr_seqs,
				    int annealing,
				    int use_gap_shares, int use_lead_columns, int use_labels, int use_transition_pseudo_counts,
				    int use_emission_pseudo_counts, int normalize, int scoring_method, int use_nr_occ,
				    int multi_scoring_method, double *aa_freqs, double *aa_freqs_2, double *aa_freqs_3,
				    double *aa_freqs_4, int use_prior);
void extended_msa_baum_welch_dirichlet_multi(struct hmm_multi_s *hmmp, struct msa_sequences_multi_s *msa_seq_infop,
					     int nr_seqs, int annealing,
					     int use_gap_shares, int use_lead_columns, int use_labels,
					     int use_transition_pseudo_counts,
					     int use_emission_pseudo_counts, int normalize, int scoring_method, int use_nr_occ,
					     int multi_scoring_method, double *aa_freqs, double *aa_freqs_2, double *aa_freqs_3,
					     double *aa_freqs_4, int use_prior);



/* std_funcs */
void* malloc_or_die(int);
void init_float_mtx(double*, double, int);
void init_viterbi_s_mtx(struct viterbi_s*, double, int);
void printhelp_modhmms();
void printhelp_modhmms_msa();
void printhelp_hmmtrain();
void printhelp_hmmtrain_msa();
void printhelp_modhmms_multialpha();
void printhelp_modhmms_msa_multialpha();
void printhelp_hmmtrain_multialpha();
void printhelp_hmmtrain_msa_multialpha();
void printhelp_modhmms_tm_multialpha();
void printhelp_modhmms_tm_msa_multialpha();
void printhelp_hmmtrain_tm_multialpha();
void printhelp_hmmtrain_tm_msa_multialpha();
void printhelp_modhmms_tm();
void printhelp_modhmms_tm_msa();
void printhelp_hmmtrain_tm();
void printhelp_hmmtrain_tm_msa();

void printhelp_chmmtrain();
void printhelp_chmmtrain_msa();
void printhelp_chmmtrain_multialpha();
void printhelp_chmmtrain_msa_multialpha();
void printhelp_add_alphabet();
void printhelp_add2profilehmm();
void printhelp_cal();
void printhelp_opt();

int get_mtx_index(int,int,int);
int get_alphabet_index(struct letter_s*, char*, int);
int get_alphabet_index_msa_query(char*, char*, int);
int get_replacement_letter_index(struct letter_s*, struct replacement_letter_s*);
int get_replacement_letter_index_multi(struct letter_s *c, struct replacement_letter_multi_s *replacement_letters, int alphabet);
int get_alphabet_index_single(char*, char, int);
int get_replacement_letter_index_single(char*, struct replacement_letter_s*);
int get_seq_length(struct letter_s*);
int path_length(int, int, struct hmm_s*, int);
int path_length_multi(int, int, struct hmm_multi_s*, int);
void print_seq(struct letter_s*, FILE*, int, char*, int);
struct path_element* get_end_path_start(int l, struct hmm_s *hmmp);
struct path_element* get_end_path_start_multi(int l, struct hmm_multi_s *hmmp);
char* get_profile_vertex_type(int, int*);
void get_replacement_letters(FILE*, struct replacement_letter_s*);
void get_aa_distrib_mtx(FILE *distribmtxfile, struct aa_distrib_mtx_s *aa_distrib_mtxp);
void get_replacement_letters_multi(FILE *replfile, struct replacement_letter_multi_s *replacement_lettersp);
char* letter_as_string(struct letter_s*);
char* sequence_as_string(struct letter_s*);
void get_viterbi_label_path(struct viterbi_s *cur, struct hmm_s *hmmp,
			    struct viterbi_s *viterbi_mtxp, int row, int row_size, char *labels, int *ip);
void get_viterbi_label_path_multi(struct viterbi_s *cur, struct hmm_multi_s *hmmp,
				  struct viterbi_s *viterbi_mtxp, int row, int row_size, char *labels, int *ip);
void get_viterbi_path(struct viterbi_s *cur, struct hmm_s *hmmp,
		      struct viterbi_s *viterbi_mtxp, int row, int row_size, int *path, int *ip);
void get_viterbi_path_multi(struct viterbi_s *cur, struct hmm_multi_s *hmmp,
			    struct viterbi_s *viterbi_mtxp, int row, int row_size, int *path, int *ip);
void itoa(char* s, int nr);
void ftoa(char* s, double nr, int prec);
int read_subst_matrix(double **mtx, FILE *substmtxfile);
int read_subst_matrix_multi(double **mtxpp, double **mtxpp_2, double **mtxpp_3, double **mtxpp_4, FILE *substmtxfile);
int read_prior_file(struct emission_dirichlet_s *em_di, struct hmm_s *hmmp, FILE *priorfile);
int read_frequencies(FILE *freqfile, double **aa_freqs);
int read_frequencies_multi(FILE *freqfile, double **aa_freqsp, double **aa_freqsp_2, double **aa_freqsp_3, double **aa_freqsp_4);
int read_prior_file_multi(struct emission_dirichlet_s *em_di, struct hmm_multi_s *hmmp, FILE *priorfile, int alphabet);
int read_multi_prior_file_multi(struct emission_dirichlet_s *em_di, struct hmm_multi_s *hmmp, FILE *priorfile, int alphabet);
int locked_state(struct hmm_s *hmmp, int v);
int locked_state_multi(struct hmm_multi_s *hmmp, int v);
int get_best_reliability_score(double reliability_score_1, double reliability_score_2, double reliability_score_3);
void hmm_garbage_collection(FILE *hmmfile, struct hmm_s *hmmp);
void hmm_garbage_collection_multi(FILE *hmmfile, struct hmm_multi_s *hmmp);
void hmm_garbage_collection_multi_no_dirichlet(FILE *hmmfile, struct hmm_multi_s *hmmp);
void msa_seq_garbage_collection_multi(struct msa_sequences_multi_s *msa_seq_info, int nr_alphabets);
void seq_garbage_collection_multi(struct sequences_multi_s *seq_info, int nr_alphabets);
void get_msa_labels(FILE *labelfile, struct msa_sequences_s *msa_seq_infop, struct hmm_s *hmmp);
void get_msa_labels_all_columns(FILE *labelfile, struct msa_sequences_s *msa_seq_infop, struct hmm_s *hmmp);
int update_shares_prior(struct emission_dirichlet_s *em_di, struct hmm_s *hmmp,
			struct msa_sequences_s *msa_seq_infop, int l);
int replacement_letter(struct letter_s *cur_letterp, struct replacement_letter_s *replacement_letters, 
		       struct msa_sequences_s *msa_seq_infop, struct hmm_s *hmmp, int seq_pos);
void get_labels_multi(FILE *labelfile, struct sequences_multi_s *seq_infop, struct hmm_multi_s *hmmp, int seq_nr);
void get_msa_labels_multi(FILE *labelfile, struct msa_sequences_multi_s *msa_seq_infop, struct hmm_multi_s *hmmp);
void get_msa_labels_all_columns_multi(FILE *labelfile, struct msa_sequences_multi_s *msa_seq_infop, struct hmm_multi_s *hmmp);
int update_shares_prior_multi(struct emission_dirichlet_s *em_di, struct hmm_multi_s *hmmp,
			struct msa_sequences_multi_s *msa_seq_infop, int l, int alphabet);
int replacement_letter_multi(struct letter_s *cur_letterp, struct replacement_letter_multi_s *replacement_letters, 
		       struct msa_sequences_multi_s *msa_seq_infop, struct hmm_multi_s *hmmp, int seq_pos, int alphabet);
int get_nr_alphabets(FILE *hmmfile);
void get_set_of_labels(struct hmm_s *hmmp);
void get_set_of_labels_multi(struct hmm_multi_s *hmmp);
void get_reverse_msa_seq_multi(struct msa_sequences_multi_s *msa_seq_infop, struct msa_sequences_multi_s *reverse_msa_seq_infop,
			       struct hmm_multi_s *hmmp);
void get_reverse_seq_multi(struct sequence_multi_s *seqs, struct letter_s **reverse_seq_1,
			   struct letter_s **reverse_seq_2, struct letter_s **reverse_seq_3,
			   struct letter_s **reverse_seq_4, struct hmm_multi_s *hmmp, int seq_len);

/* std calculation funcs */
void add_to_E_continuous(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
                         int k, int a_size, double *emissions);
double get_single_gaussian_statescore(double mu, double sigma_square, double letter);
double get_dp_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
			 int p, double *emissions,  int vertex, int normalize, double *gap_shares);
double get_dp_picasso_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
				 int p, double *emissions,  int vertex, int normalize, double *gap_shares, double *aa_freqs);
double get_sjolander_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
			       int p, double *emissions, int vertex, int normalize, double *gap_shares);
double get_sjolander_reversed_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
					 int p, double *emissions,  int vertex, int normalize, double *gap_shares);
double get_picasso_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
			 int p, double *emissions,  int vertex, int normalize, double *gap_shares, double *aa_freqs);
double get_picasso_sym_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
			 int p, double *emissions,  int vertex, int normalize, double *gap_shares, double *aa_freqs);
double get_subst_mtx_product_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
					int p, double *emissions, int vertex, double *subst_mtx);
double get_subst_mtx_dot_product_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
					    int p, double *emissions,  int vertex, int normalize, double *gap_shares,
					    int query_index, double *subst_mtx);
double get_subst_mtx_dot_product_prior_statescore(int a_size, int use_gap_shares, int use_prior_shares, struct msa_letter_s *msa_seq,
						  int p, double *emissions,  int vertex, int normalize, double *gap_shares,
						  int query_index, double *subst_mtx);

void add_to_E_dot_product(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
			  int k, int a_size, int normalize);
void add_to_E_dot_product_picasso(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
				  int k, int a_size, int normalize);
void add_to_E_picasso(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
		      int k, int a_size, int normalize);
void add_to_E_picasso_sym(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
		      int k, int a_size, int normalize);
void add_to_E_sjolander_score(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
			      int k, int a_size, int normalize);
void add_to_E_sjolander_reversed_score(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
				       int k, int a_size, int normalize);
void add_to_E_subst_mtx_product(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
				int k, int a_size, int normalize, double *subst_mtx);
void add_to_E_subst_mtx_dot_product(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
				    int k, int a_size, int normalize, double *subst_mtx, char *alphabet);
void add_to_E_subst_mtx_dot_product_prior(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
					  int k, int a_size, int normalize, double *subst_mtx, char *alphabet);

void add_to_E_dot_product_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
				int k, int a_size, int normalize);
void add_to_E_dot_product_picasso_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
					 int k, int a_size, int normalize);
void add_to_E_picasso_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
			     int k, int a_size, int normalize);
void add_to_E_picasso_sym_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
				 int k, int a_size, int normalize);
void add_to_E_sjolander_score_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
				int k, int a_size, int normalize);
void add_to_E_sjolander_reversed_score_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
					      int k, int a_size, int normalize);
void add_to_E_subst_mtx_product_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
				       int k, int a_size, int normalize, double *subst_mtx);
void add_to_E_subst_mtx_dot_product_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
					   int k, int a_size, int normalize, double *subst_mtx, char *alphabet);
void add_to_E_subst_mtx_dot_product_prior_nr_occ(double *E, double Eka_base, struct msa_letter_s *msa_seq, int p,
					  int k, int a_size, int normalize, double *subst_mtx, char *alphabet);

void update_labelings(struct one_best_s *cur_rowp, char *vertex_labels, 
		      int *sorted_v_list, int seq_len, int c, char *labels, int nr_of_labels, int nr_v);
void deallocate_row_labelings(struct one_best_s *prev_rowp, int nr_v);


/* debug_funcs */
void dump_trans_matrix(int,int,double*);
void dump_emiss_matrix(int,int,double*);
void dump_forward_matrix(int,int,struct forward_s*);
void dump_backward_matrix(int,int,struct backward_s*);
void dump_one_best_matrix(int, int, struct one_best_s*);
void dump_scaling_array(int,double*);
void dump_from_trans_array(int,struct path_element**);
void dump_to_trans_array(int,struct path_element**);
void dump_viterbi_path(struct viterbi_s*, struct hmm_s*, struct viterbi_s*, int, int);
void dump_viterbi_label_path(struct viterbi_s*, struct hmm_s*, struct viterbi_s*, int, int);
void dump_T_matrix(int,int,double*);
void dump_E_matrix(int,int,double*);
void dump_distrib_groups(int*, int);
void dump_trans_tie_groups(struct transition_s*, int);
void dump_prior_struct(struct emission_dirichlet_s*);
void dump_silent_vertices_multi(struct hmm_multi_s *hmmp);
void dump_msa_seqs(struct msa_sequences_s*, int);
void dump_msa_seqs_multi(struct msa_sequences_s*, struct hmm_multi_s*);
void dump_to_silent_trans_array(int, int**);
void dump_v_list(int*);
void dump_labeling(char*, int);
void dump_label_tmp_list(int *list);
void check_for_corrupt_values(int nr_rows, int nr_cols, double *mtx, char *name);