iVar
alignment.cpp
1 #include<iostream>
2 #include <algorithm>
3 #include<vector>
4 #include<fstream>
5 
6 #include "alignment.h"
7 
8 /* Substitution Matrix
9 
10  A T G C N
11 A 1 -1 -1 -1 0
12 T -1 1 -1 -1 0
13 G -1 -1 1 -1 0
14 C -1 -1 -1 1 0
15 N 0 0 0 0 0
16 */
17 
18 int get_sub_score(char a, char b){
19  int i,j;
20  switch(a){
21  case 'A':
22  i = 0;
23  break;
24  case 'T':
25  i = 1;
26  break;
27  case 'G':
28  i = 2;
29  break;
30  case 'C':
31  i = 3;
32  break;
33  case 'N':
34  i = 4;
35  break;
36  }
37  switch(b){
38  case 'A':
39  j = 0;
40  break;
41  case 'T':
42  j = 1;
43  break;
44  case 'G':
45  j = 2;
46  break;
47  case 'C':
48  j = 3;
49  break;
50  case 'N':
51  i = 4;
52  break;
53  }
54  return substitution[i][j];
55 }
56 
57 int get_gap_penalty(int k, char a){
58  if(a == 'N') // Allow for i7 and i5 indexesto be specified as NNNNNNNN
59  return 0;
60  return k * gap_open; // Linear Gap Penalty
61 }
62 
63 void print_matrix(int h[][max_adapter_size], int r, int c, std::string read, std::string adap){
64  for(int i =0; i< r;i++){
65  if(i == 0){
66  std::cout << " ";
67  for(int k =0; k < c-1; k++) std::cout << adap[k] << " ";
68  std::cout << "\n";
69  }
70  for(int j = 0; j < c;j++){
71  if(j == 0 && i > 0){
72  if(i -1 < read.length())
73  std::cout << read[i-1] << " ";
74  }
75  if(i == 0 && j == 0)
76  std::cout << " " << " ";
77  std::cout << h[i][j] << " ";
78  }
79  std::cout << "\n";
80  }
81 }
82 
83 // Return Value 1 - Diag, 2->Right, 3->Down, 0-> 0
84 int* get_score_cell(int h[][max_adapter_size], int i, int j, std::string read, std::string adap){
85  int s[] = {0,0,0};
86  int *tmp = new int[2];
87  tmp[0] = 0;
88  tmp[1] = 0;
89  s[0] = h[i-1][j-1] + get_sub_score(read[i-1], adap[j-1]);
90  s[1] = h[i-1][j] - get_gap_penalty(1, adap[j-1]);
91  s[2] = h[i][j-1] - get_gap_penalty(1, read[i-1]);
92  int max = (s[0]>=s[1]) ? s[0] : s[1];
93  max = (max>=s[2]) ? max : s[2];
94  max = (max > 0) ? max : 0;
95  tmp[0] = max;
96  if(max == s[0]){
97  tmp[1] = 1;
98  } else if(max == s[1]){
99  tmp[1] = 3;
100  } else if(max == s[2]){
101  tmp[1] = 2;
102  }
103  return tmp;
104 }
105 
106 void print_alignment(char a[2][max_read_size], int n){
107  std::cout << "Alignment: " << std::endl << std::endl;
108  for(int j =0;j<2;j++){
109  if(j == 0)
110  std::cout << "Read: ";
111  else if(j == 1)
112  std::cout << "Adap: ";
113  for(int i = n -1; i >= 0;i--){
114  std::cout << a[j][i] << " ";
115  }
116  std::cout << std::endl;
117  }
118 }
119 
120 int* align_seqs(std::string read, std::string adap){
121  int h[max_read_size][max_adapter_size],
122  t[max_read_size][max_adapter_size],
123  m = read.length() + 1,
124  n=adap.length()+ 1,
125  max_i = 0,
126  max_j = 0,
127  max_v = -1,
128  max_score,
129  *rt = new int[2],
130  *tmp = new int[2];
131  rt[0] = -1;
132  rt[1] = read.length();
133  // Initialize first column to 0
134  for(int i = 0; i < n; i++) h[0][i] = 0;
135  // Initialize first row to 0
136  for(int i = 0; i < m; i++) h[i][0] = 0;
137  // print_matrix(h, m, n, read, adap);
138  for(int i = 1; i < m; i++){
139  for(int j = 1; j < n; j++){
140  tmp = get_score_cell(h, i, j, read, adap);
141  h[i][j] = *tmp;
142  t[i][j] = *(tmp+1);
143  if(*tmp >= max_v){
144  max_i = i;
145  max_j = j;
146  max_v = *tmp;
147  }
148  }
149  }
150  max_score = max_v;
151  // if(max_i != read.length()) // Ensure alignment to end of read.
152  // return rt;
153  // std::cout << "Score: " << max_v << " ";
154  // print_matrix(h, m, n, read, adap);
155  // print_matrix(t, m, n, read, adap);
156  // Traceback
157  int _t, _l, _d, _align_n = 0;
158  char _align[2][max_read_size];
159  // 1 - Diag, 2->Right, 3->Down
160  while(max_v != 0){
161  _d = h[max_i-1][max_j-1];
162  _l = h[max_i][max_j-1];
163  _t = h[max_i-1][max_j];
164  switch(t[max_i][max_j]){
165  case 1:
166  max_v = _d;
167  max_i = max_i -1;
168  max_j = max_j - 1;
169  _align[0][_align_n] = read[max_i];
170  _align[1][_align_n] = adap[max_j];
171  _align_n++;
172  break;
173  case 2:
174  max_v = _l;
175  max_j = max_j - 1;
176  _align[0][_align_n] = '-';
177  _align[1][_align_n] = adap[max_j];
178  _align_n++;
179  break;
180  case 3:
181  max_v = _t;
182  max_i = max_i - 1;
183  _align[0][_align_n] = read[max_i];
184  _align[1][_align_n] = '-';
185  _align_n++;
186  break;
187  case 0:
188  max_v = 0;
189  break;
190  }
191  }
192  rt[1] = max_i;
193  print_alignment(_align, _align_n);
194  if(max_score <= ((_align_n - 2) * unit_score) - get_gap_penalty(2, 'A') || max_j != 0) // If alignment does not start at beginning of adapter
195  return rt;
196  rt[0] = max_score;
197  // print_alignment(_align, _align_n);
198  return rt;
199 }
200 
201 // int find_adapters_contaminants(std::istream &cin, std::string adp_cntms_file){
202 // std::vector<std::string> adp = read_adapters_from_fasta(adp_cntms_file, "Read1");
203 // std::string line, bases, qual, b;
204 // int ctr = 0, score, *t = new int[2];
205 // while (std::getline(cin, line)){
206 // if(ctr % 400000 == 0)
207 // std::cout << "Processed " << (ctr/4) << " reads ..." << std::endl;
208 // if(ctr % 2 == 0){
209 // ctr++;
210 // continue;
211 // }
212 // if((ctr+1) % 2 == 0 && (ctr + 1) % 4 == 0 ){
213 // qual = line;
214 // // std::cout << bases << std::endl;
215 // // std::cout << qual << std::endl;
216 // score = -1;
217 // for(std::vector<std::string>::iterator it = adp.begin(); it != adp.end() && score == -1; ++it) {
218 // b = ((*it).length() > bases.length()) ? bases : bases.substr(bases.length() - (*it).length(), (*it).length());
219 // t = align_seqs(b, *it);
220 // score = *t;
221 // if(score != -1)
222 // std::cout << "Trimmed: " << bases.substr(0, bases.length() - *(t+1)) << std::endl;
223 // }
224 // } else {
225 // bases = line;
226 // }
227 // ctr++;
228 // }
229 // return 0;
230 // }
231 
232 // int main(int argc, char* argv[]){
233 // std::string adp = "../data/adapters/NexteraPR.fa";
234 // // clock_t begin = clock();
235 // find_adapters_contaminants(std::cin, adp);
236 // // clock_t end = clock();
237 // // double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
238 // // std::cout << "Time Taken: " << elapsed_secs << std::endl;
239 // return 0;
240 // }