iVar
call_consensus_pileup.cpp
1 #include<iostream>
2 #include<fstream>
3 #include<sstream>
4 #include<vector>
5 #include<algorithm>
6 #include<string>
7 #include<regex>
8 
9 #include "allele_functions.h"
10 #include "call_consensus_pileup.h"
11 
12 const std::string gap="N";
13 
14 void format_alleles(std::vector<allele> &ad){
15  std::vector<allele>::iterator it = ad.begin();
16  int prev_ind = 0;
17  while(it < ad.end()){
18  if(it->nuc[0] == '-'){ // Remove deletions
19  it = ad.erase(it);
20  } // else if (it->nuc[0] == '+'){
21  // it->nuc[0] = it->prev_base;
22  // prev_ind = find_ref_in_allele(ad, it->prev_base);
23  // if(prev_ind!=-1)
24  // ad.at(prev_ind).depth -= it->depth;
25  // }
26  ++it;
27  }
28 }
29 
30 bool compare_allele_depth(const allele &a, const allele &b){
31  return b.depth < a.depth;
32 }
33 
34 ret_t get_consensus_allele(std::vector<allele> ad, uint8_t min_qual, double threshold){
35  ret_t t;
36  t.nuc="";
37  t.q = min_qual+33;
38  if(ad.size()==0)
39  return t;
40  format_alleles(ad);
41  if(ad.size() == 1){
42  t.nuc = (ad.at(0).nuc.compare("*") == 0) ? "" : ad.at(0).nuc;
43  t.q = (ad.at(0).nuc.compare("*") == 0) ? min_qual + 33 : ad.at(0).mean_qual + 33;
44  return t;
45  }
46  std::string cnuc = "";
47  std::vector<allele> nuc_pos;
48  allele tmp_a;
49  char n;
50  uint32_t max_l = 0, max_depth = 0, tmp_depth = 0, cur_depth = 0, total_max_depth = 0, gap_depth = 0, total_indel_depth = 0;
51  uint8_t ambg_n = 1, ctr = 0;
52  double q = 0, tq = 0, cur_threshold = 0;
53  std::vector<allele>::iterator it;
54  for(std::vector<allele>::iterator it = ad.begin(); it != ad.end(); ++it) {
55  if(it->nuc.length() > max_l){
56  max_l = it->nuc.length();
57  }
58  if(it->nuc.length() == 1){
59  total_max_depth += it->depth;
60  total_indel_depth += it->depth;
61  total_indel_depth = total_indel_depth - it->end; // For indels
62  }
63  }
64  t.nuc = "";
65  t.q = "";
66  for (uint16_t i = 0; i < max_l; ++i){
67  n = '*';
68  q = 0;
69  tq = 0;
70  max_depth = 0;
71  tmp_depth = 0;
72  cur_depth = 0;
73  // prev_depth = 0;
74  ctr = 1;
75  it = ad.begin();
76  gap_depth = 0;
77  nuc_pos.clear();
78  while(it!=ad.end()){
79  if(!(i < it->nuc.length())){
80  it++;
81  continue;
82  }
83  if(it->nuc[i] == '+' || it->nuc[i] == '-'){
84  it++;
85  continue;
86  }
87  tmp_depth = it->depth;
88  tq = it->mean_qual;
89  ctr = 1;
90  while(it!=ad.end()-1 && i < (it+1)->nuc.length() && it->nuc[i] == (it+1)->nuc[i]){ // Third condition for cases with more than 1 base in insertion && (i+1) < (it+1)->nuc.length()
91  tmp_depth += (it+1)->depth;
92  tq = ((tq * (ctr)) + (it+1)->mean_qual)/(ctr + 1);
93  it++;
94  ctr++;
95  }
96  cur_depth += tmp_depth;
97  tmp_a.nuc = it->nuc[i];
98  tmp_a.mean_qual = tq;
99  tmp_a.reverse = 0; // Reverse reads not important for consensus
100  tmp_a.depth = tmp_depth;
101  nuc_pos.push_back(tmp_a);
102  // if(tmp_depth > max_depth){
103  // n = it->nuc[i];
104  // max_depth = tmp_depth;
105  // q = tq;
106  // ambg_n = 1;
107  // } else if(tmp_depth == max_depth){
108  // n = gt2iupac(n, it->nuc[i]);
109  // q = ((q * ambg_n) + tq)/(ambg_n+1);
110  // ambg_n += 1;
111  // }
112  it++;
113  }
114  // Sort nuc_pos by depth of alleles
115  std::sort(nuc_pos.begin(), nuc_pos.end(), compare_allele_depth);
116  it=nuc_pos.begin();
117  n = it->nuc[0];
118  q = it->mean_qual;
119  max_depth = it->depth;
120  it++;
121  ambg_n = 1;
122  if(i > 0){
123  cur_threshold = threshold * (double) total_indel_depth;
124  } else {
125  cur_threshold = threshold * (double)total_max_depth;
126  }
127  while(it!=nuc_pos.end() && (max_depth < cur_threshold || it->depth == (it -1)->depth)){ // Iterate till end or till cross threshold and no more allele with same depth
128  n = gt2iupac(n, it->nuc[0]);
129  q = ((q * ambg_n) + it->mean_qual)/(ambg_n+1);
130  ambg_n += 1;
131  max_depth += it->depth;
132  it++;
133  }
134  if(max_depth < cur_threshold) // If depth still less than threshold
135  n = 'N';
136  if(i > 0)
137  gap_depth = (total_indel_depth > cur_depth) ? total_indel_depth - cur_depth : 0;
138  else
139  gap_depth = 0; // For first position of allele
140  if(n!='*' && max_depth >= gap_depth){ // TODO: Check what to do when equal.{
141  t.nuc += n;
142  q += 0.5; // For rounding before converting to int
143  t.q += (((uint8_t)q)+33);
144  }
145  }
146  return t;
147 }
148 
149 int call_consensus_from_plup(std::istream &cin, std::string out_file, uint8_t min_qual, double threshold){
150  std::string line, cell;
151  std::ofstream fout(out_file+".fa");
152  std::ofstream tmp_qout(out_file+".qual.txt");
153  fout << ">Consensus" << "_" << threshold <<std::endl;
154  int ctr = 0, pos = 0, mdepth = 0;
155  std::stringstream lineStream;
156  char ref;
157  std::string bases;
158  std::string qualities;
159  std::vector<allele> ad;
160  while (std::getline(cin, line)){
161  lineStream << line;
162  ctr = 0;
163  while(std::getline(lineStream,cell,'\t')){
164  switch(ctr){
165  case 0:
166  break;
167  case 1:
168  pos = stoi(cell);
169  break;
170  case 2:
171  ref = cell[0];
172  break;
173  case 3:
174  mdepth = stoi(cell);
175  break;
176  case 4:
177  bases = cell;
178  break;
179  case 5:
180  qualities = cell;
181  break;
182  case 6:
183  break;
184  }
185  ctr++;
186  }
187  ad = update_allele_depth(ref, bases, qualities, min_qual);
188  ret_t t = get_consensus_allele(ad, min_qual, threshold);
189  fout << t.nuc;
190  tmp_qout << t.q;
191  lineStream.clear();
192  ad.clear();
193  }
194  tmp_qout.close();
195  fout.close();
196  return 0;
197 }
198 
199 // int main_old(int argc, char* argv[]) {
200 // std::string line, cell;
201 // std::string out_file = argv[1];
202 // uint8_t min_qual = 20;
203 // if(argc > 1){
204 // min_qual = atoi(argv[2]);
205 // }
206 // std::ofstream fout(out_file+".fa");
207 // fout << ">Consensus"<<std::endl;
208 // int ctr = 0, pos = 0, mdepth = 0;
209 // std::stringstream lineStream;
210 // char ref;
211 // std::string bases;
212 // std::string qualities;
213 // std::vector<allele> ad;
214 // while (std::getline(std::cin, line)){
215 // lineStream << line;
216 // ctr = 0;
217 // while(std::getline(lineStream,cell,'\t')){
218 // switch(ctr){
219 // case 0:
220 // break;
221 // case 1:
222 // pos = stoi(cell);
223 // break;
224 // case 2:
225 // ref = cell[0];
226 // break;
227 // case 3:
228 // mdepth = stoi(cell);
229 // break;
230 // case 4:
231 // bases = cell;
232 // break;
233 // case 5:
234 // qualities = cell;
235 // break;
236 // case 6:
237 // break;
238 // }
239 // ctr++;
240 // }
241 // ad = update_allele_depth(ref, bases, qualities, min_qual);
242 // // print_allele_depths(ad);
243 // fout << get_consensus_allele(ad);
244 // // std::cout << std::endl << "Consensus: " << get_consensus_allele(ad) << std::endl;
245 // lineStream.clear();
246 // }
247 // fout.close();
248 // return 0;
249 // }