File: Hypermutationglobalerrorrate.h

package info (click to toggle)
igor 1.4.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 4,116 kB
  • sloc: cpp: 12,453; python: 1,047; sh: 124; makefile: 33
file content (196 lines) | stat: -rw-r--r-- 6,703 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
/*
 * Hypermutationglobalerrorrate.h
 *
 *  Created on: Mar 8, 2016
 *      Author: Quentin Marcou
 *
 *  This source code is distributed as part of the IGoR software.
 *  IGoR (Inference and Generation of Repertoires) is a versatile software to analyze and model immune receptors
 *  generation, selection, mutation and all other processes.
 *   Copyright (C) 2017  Quentin Marcou
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.

 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 */

#ifndef HYPERMUTATIONGLOBALERRORRATE_H_
#define HYPERMUTATIONGLOBALERRORRATE_H_

#include "Errorrate.h"
#include "Genechoice.h"
#include "Deletion.h"
#include <algorithm>
#include <array>
#include <math.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_blas.h>
#include <memory>

/**
 * \class Hypermutation_global_errorrate Hypermutationglobalerrorrate.h
 * \brief An additive (independent site) context dependent hypermutation model.
 * \author Q.Marcou
 * \version 1.0
 *
 * A specialization of the ErrorRate class.
 * Implements a context dependent hypermutation/error model with tunable context size.
 * Nucleotide from the context are assumed to contribute independently to the mutability of the context through an additive logarithmic score.
 * Such a model contains only 3N+1 parameters and allows to probe large context sizes.
 * The identity of the resulting nucleotide after mutation is assumed to follow a uniform distribution.
 */
class Hypermutation_global_errorrate: public Error_rate {

public:
	Hypermutation_global_errorrate(size_t,Gene_class,Gene_class,double);
	Hypermutation_global_errorrate(size_t,Gene_class,Gene_class,double,std::vector<double>);
	Hypermutation_global_errorrate(size_t,Gene_class,Gene_class,double,std::string);
	Hypermutation_global_errorrate(size_t,Gene_class,Gene_class,double,std::vector<double>,std::string);
	//Hypermutation_global_errorrate(size_t,Gene_class,Gene_class, ??); Constructor to read or copy the error rate
	virtual ~Hypermutation_global_errorrate();
	double compare_sequences_error_prob( double ,const std::string& , Seq_type_str_p_map& , const Seq_offsets_map& , const std::unordered_map<std::tuple<Event_type,Gene_class,Seq_side>, std::shared_ptr<Rec_Event>>&  , Mismatch_vectors_map& , double& , double& );
	void update();
	void initialize(const std::unordered_map<std::tuple<Event_type,Gene_class,Seq_side>, std::shared_ptr<Rec_Event>>&);
	void add_to_norm_counter();
	void clean_seq_counters();
	void clean_all_counters();
	void write2txt(std::ofstream&);
	void set_output_Nmer_stream(std::string);
	std::shared_ptr<Error_rate> copy()const;
	std::string type() const {return "HypermutationGlobalErrorRate";}
	Hypermutation_global_errorrate& operator+=(Hypermutation_global_errorrate);
	Error_rate* add_checked (Error_rate*);
	const double& get_err_rate_upper_bound(size_t,size_t) ;
	void build_upper_bound_matrix(size_t,size_t);
	int get_number_non_zero_likelihood_seqs() const{return number_seq;};
	std::queue<int>  generate_errors(std::string& , std::mt19937_64&) const;
	uint64_t generate_random_contributions(double);


private:
	void update_Nmers_proba(int,int,double);
	//void compute_P_SHM_and_BG();
	double compute_Nmer_unorm_score(int*,double*);
	double compute_new_model_likelihood(double,gsl_vector*);
	void increment_base_10_and_4(int& , int*);

	void introduce_uniform_transversion(char&, std::mt19937_64& , std::uniform_real_distribution<double>&) const;

	Gene_class learn_on;
	Gene_class apply_to;
	size_t mutation_Nmer_size;
	//std::unique_ptr<double[]> ei_nucleotide_contributions;
	double* ei_nucleotide_contributions;
	double mu;
	//std::map<int,double> Nmer_background_proba;
	double* Nmer_mutation_proba;

/*	double* Nmer_P_SHM;
	double* Nmer_P_BG;*/

	size_t alphabet_size = 4;

	//std::map<int,double> Nmer_background_proba_count;
	//std::map<int,double> Nmer_SHM_proba_count;


	//Normalized coverage and error counters
	//# V D and J possible realizations
	std::shared_ptr<Gene_choice> v_gene_event_p;
	size_t n_v_real;
	std::unordered_map<std::string , Event_realization> v_realizations;
	std::shared_ptr<Gene_choice> d_gene_event_p;
	size_t n_d_real;
	std::unordered_map<std::string , Event_realization> d_realizations;
	std::shared_ptr<Gene_choice> j_gene_event_p;
	size_t n_j_real;
	std::unordered_map<std::string , Event_realization> j_realizations;

	double* one_seq_Nmer_N_SHM;
	double* one_seq_Nmer_N_bg;
	double* Nmer_N_SHM;
	double* Nmer_N_bg;


	Int_Str* v_sequences;
	Int_Str* j_sequences;

	bool apply_to_v;
	bool apply_to_d;
	bool apply_to_j;

	bool learn_on_v;
	bool learn_on_d;
	bool learn_on_j;

	bool v_gene;
	bool d_gene;
	bool j_gene;
	bool vd_ins;
	bool dj_ins;
	bool vj_ins;

	const int** vgene_offset_p;
	const int** dgene_offset_p;
	const int** jgene_offset_p;

	const int** vgene_real_index_p;
	const int** dgene_real_index_p;
	const int** jgene_real_index_p;

	//Get deletion values
	//TODO need to change this in order to handle multiple models
	const int* v_3_del_value_p;
	const int* d_5_del_value_p;
	const int* d_3_del_value_p;
	const int* j_5_del_value_p;
	const int no_del_buffer = 0; //buffer used in case of no deletion event

	//Utility speed variables
	mutable int i;//iteration utility
	mutable int j;
	int v_3_del_value_corr;//Corrected value for deletion numbers to avoid taking into account negative deletions
	int d_5_del_value_corr;
	int d_3_del_value_corr;
	int j_5_del_value_corr;
	mutable double* tmp_cov_p;
	mutable double* tmp_err_p;
	int tmp_corr_len;
	int tmp_len_util;
	double scenario_new_proba;
	Int_Str scenario_resulting_sequence;

	std::vector<size_t> adressing_vector;
	mutable std::queue<size_t> current_Nmer;
	size_t largest_nuc_adress;
	mutable int tmp_int_nt;
	mutable int Nmer_index;
	std::vector<int>::const_iterator current_mismatch;
	bool is_visible_nt;

	std::vector<int> empty_vec_util;
	std::vector<int>* vec_ptr_util;

	double* debug_v_seq_coverage;
	double* debug_mismatch_seq_coverage;
	std::string debug_current_string;

	std::shared_ptr<std::ofstream> output_Nmer_stat_stream;
	bool output_Nmer_stat;



};

#endif /* HYPERMUTATIONGLOBALERRORRATE_H_ */