iVar
allele_functions.cpp
1 #include<string>
2 #include<vector>
3 #include<algorithm>
4 #include<iostream>
5 
6 #include "allele_functions.h"
7 
8 void print_allele_depths(std::vector<allele> ad){
9  std::cout << "AD Size: " << ad.size() << " " << std::endl;
10  for(std::vector<allele>::iterator it = ad.begin(); it != ad.end(); ++it) {
11  std::cout << "Nuc: " << it->nuc << std::endl;;
12  std::cout << "Depth: " << it->depth << std::endl;
13  std::cout << "Reverse: " << it->reverse <<std::endl;
14  std::cout << "Qual: " << (uint16_t) it->mean_qual << std::endl;
15  std::cout << "Beg: " << it->beg << std::endl;
16  std::cout << "End: " << it->end << std::endl;
17  }
18 }
19 
20 int check_allele_exists(std::string n, std::vector<allele> ad){
21  for(std::vector<allele>::iterator it = ad.begin(); it != ad.end(); ++it) {
22  if(it->nuc.compare(n) == 0){
23  return it - ad.begin();
24  }
25  }
26  return -1;
27 }
28 
29 int find_ref_in_allele(std::vector<allele> ad, char ref){
30  std::string ref_s(1, ref);
31  std::vector<allele>::iterator it = ad.begin();
32  while(it < ad.end()){
33  if(it->nuc.compare(ref_s) == 0)
34  return (it - ad.begin());
35  it++;
36  }
37  return -1;
38 }
39 
40 std::vector<allele> update_allele_depth(char ref,std::string bases, std::string qualities, uint8_t min_qual){
41  std::vector<allele> ad;
42  std::string indel;
43  int i = 0, n =0, j = 0, q_ind = 0;
44  bool beg, end;
45  uint8_t q;
46  float num = 0, den = 0;
47  while (i < bases.length()){
48  beg = false;
49  end = false;
50  if(bases[i] == '^'){
51  i += 2; // Skip mapping quality as well (i+1) - 33
52  continue;
53  }
54  if(bases[i] == '$'){
55  i++;
56  continue;
57  }
58  q = qualities[q_ind] - 33;
59  std::string b;
60  allele tmp;
61  bool forward= true;
62  switch(bases[i]){
63  case '.':
64  b = ref;
65  end = (bases[i+1] == '$');
66  beg = (bases[i+1] == '^');
67  break;
68  case ',':
69  b = ref;
70  forward = false;
71  end = (bases[i+1] == '$');
72  beg = (bases[i+1] == '^');
73  break;
74  case '*':
75  b = bases[i];
76  break;
77  case '+': case '-':
78  j = i+1;
79  while(isdigit(bases[j])){
80  j++;
81  }
82  j = j - (i+1);
83  n = stoi(bases.substr(i+1, j));
84  indel = bases.substr(i+1+j, n);
85  transform(indel.begin(), indel.end(), indel.begin(),::toupper);
86  b = bases[i] + indel; // + for Insertion and - for Deletion
87  i += n + j;
88  if(indel[0]>=97 && indel[0] <= 122)
89  forward=false;
90  q = min_qual; // For insertions and deletion ust use minimum quality.
91  break;
92  default:
93  int asc_val = bases[i];
94  if(asc_val >= 65 && asc_val <= 90){
95  b = bases[i];
96  } else if(bases[i]>=97 && bases[i]<=122){
97  b = bases[i] - ('a' - 'A');
98  forward = false;
99  }
100  end = (bases[i+1] == '$');
101  beg = (bases[i+1] == '^');
102  }
103  int ind = check_allele_exists(b, ad);
104  if(q >= min_qual){
105  if (ind==-1){
106  tmp.nuc = b;
107  tmp.depth = 1;
108  tmp.tmp_mean_qual = q;
109  if(!forward)
110  tmp.reverse = 1;
111  else
112  tmp.reverse = 0;
113  if(beg)
114  tmp.beg = 1;
115  else
116  tmp.beg = 0;
117  if(end)
118  tmp.end = 1;
119  else
120  tmp.end = 0;
121  ad.push_back(tmp);
122  } else {
123  ad.at(ind).tmp_mean_qual = (ad.at(ind).tmp_mean_qual * ad.at(ind).depth + q)/(ad.at(ind).depth + 1);
124  ad.at(ind).depth += 1;
125  if(beg)
126  ad.at(ind).beg += 1;
127  if(end)
128  ad.at(ind).end += 1;
129  if(!forward)
130  ad.at(ind).reverse += 1;
131  }
132  }
133  i++;
134  if(b[0] !='+' && b[0]!='-')
135  q_ind++;
136  }
137  for(std::vector<allele>::iterator it = ad.begin(); it!=ad.end(); ++it){
138  it->mean_qual = (uint8_t) it->tmp_mean_qual;
139  }
140  if(ad.size() > 0)
141  std::sort(ad.begin(), ad.end());
142  return ad;
143 }
144 
145 int get_index(char a){
146  switch(a){
147  case'Y':
148  a = 0;
149  break;
150  case'R':
151  a = 1;
152  break;
153  case'W':
154  a = 2;
155  break;
156  case'S':
157  a = 3;
158  break;
159  case'K':
160  a = 4;
161  break;
162  case'M':
163  a = 5;
164  break;
165  case'A':
166  a = 6;
167  break;
168  case'T':
169  a = 7;
170  break;
171  case'G':
172  a = 8;
173  break;
174  case'C':
175  a = 9;
176  break;
177  case'D':
178  a = 10;
179  break;
180  case'V':
181  a = 11;
182  break;
183  case'H':
184  a = 12;
185  break;
186  case'B':
187  a = 13;
188  break;
189  default:
190  a = -1;
191  }
192  return (int) a;
193 }
194 
195 // Modified gt2iupac from bcftools.h. Expanded iupac matrix to include all ambigious nucleotides(added Y, R, W, S, K, M, A, T, G, C, D, V, H, B) - https://github.com/samtools/bcftools/blob/b0376dff1ed70603c9490802f37883b9009215d2/bcftools.h#L48
196 /*
197 Expanded IUPAC matrix:
198 
199 Y N H B B H H Y B Y N N H B
200 N R D V D V R D R V D V N N
201 H D W N D H W W D H D N H N
202 B V N S B V V B S S N V N B
203 B D D B K N D K K B D N N B
204 H V H V N M M H V M N V H N
205 H R W V D M A W R M D V H N
206 Y D W B K H W T K Y D N H B
207 B R D S K V R K G S D V N B
208 Y V H S B M M Y S C N V H B
209 N D D N D N D D D N D N N N
210 N V N V N V V N V V N V N N
211 H N H N N H H H N H N N H N
212 B N N B B N N B B B N N N B
213 
214  */
215 char gt2iupac(char a, char b){
216  if(a == '*' || b == '*')
217  return 'N';
218  static const char iupac[14][14] = {
219  {'Y', 'N', 'H', 'B', 'B', 'H', 'H', 'Y', 'B', 'Y', 'N', 'N', 'H', 'B'},
220  {'N', 'R', 'D', 'V', 'D', 'V', 'R', 'D', 'R', 'V', 'D', 'V', 'N', 'N'},
221  {'H', 'D', 'W', 'N', 'D', 'H', 'W', 'W', 'D', 'H', 'D', 'N', 'H', 'N'},
222  {'B', 'V', 'N', 'S', 'B', 'V', 'V', 'B', 'S', 'S', 'N', 'V', 'N', 'B'},
223  {'B', 'D', 'D', 'B', 'K', 'N', 'D', 'K', 'K', 'B', 'D', 'N', 'N', 'B'},
224  {'H', 'V', 'H', 'V', 'N', 'M', 'M', 'H', 'V', 'M', 'N', 'V', 'H', 'N'},
225  {'H', 'R', 'W', 'V', 'D', 'M', 'A', 'W', 'R', 'M', 'D', 'V', 'H', 'N'},
226  {'Y', 'D', 'W', 'B', 'K', 'H', 'W', 'T', 'K', 'Y', 'D', 'N', 'H', 'B'},
227  {'B', 'R', 'D', 'S', 'K', 'V', 'R', 'K', 'G', 'S', 'D', 'V', 'N', 'B'},
228  {'Y', 'V', 'H', 'S', 'B', 'M', 'M', 'Y', 'S', 'C', 'N', 'V', 'H', 'B'},
229  {'N', 'D', 'D', 'N', 'D', 'N', 'D', 'D', 'D', 'N', 'D', 'N', 'N', 'N'},
230  {'N', 'V', 'N', 'V', 'N', 'V', 'V', 'N', 'V', 'V', 'N', 'V', 'N', 'N'},
231  {'H', 'N', 'H', 'N', 'N', 'H', 'H', 'H', 'N', 'H', 'N', 'N', 'H', 'N'},
232  {'B', 'N', 'N', 'B', 'B', 'N', 'N', 'B', 'B', 'B', 'N', 'N', 'N', 'B'}
233  };
234  if ( a>='a' ) a -= 'a' - 'A';
235  if ( b>='a' ) b -= 'a' - 'A';
236  int _a, _b;
237  _a = get_index(a);
238  _b = get_index(b);
239  if(_a == -1 || _b == -1)
240  return 'N';
241  return iupac[_a][_b];
242 }