iVar
remove_reads_from_amplicon.cpp
1 #include "primer_bed.h"
2 #include "htslib/sam.h"
3 #include "htslib/bgzf.h"
4 
5 #include<iostream>
6 
7 int rmv_reads_from_amplicon(std::string bam, std::string region_, std::string bam_out, uint16_t* amplicon, int amp_n){
8  for (int i = 0; i < amp_n; ++i){
9  std::cout << "Amplicon: " << amplicon[i] << " ";
10  }
11  bam_out += ".bam";
12  std::cout << std::endl;
13  std::cout << bam_out << std::endl;
14  if(bam.empty()){
15  std::cout << "Bam in empty" << std::endl;
16  return 0;
17  }
18  //open BAM for reading
19  samFile *in = hts_open(bam.c_str(), "r");
20  BGZF *out = bgzf_open(bam_out.c_str(), "w");
21  if(in == NULL) {
22  std::cout << ("Unable to open BAM/SAM file.") << std::endl;
23  return -1;
24  }
25  //Load the index
26  hts_idx_t *idx = sam_index_load(in, bam.c_str());
27  if(idx == NULL) {
28  std::cout << ("Unable to open BAM/SAM index.") << std::endl; // TODO: Generate index
29  return -1;
30  }
31  //Get the header
32  bam_hdr_t *header = sam_hdr_read(in);
33  if(bam_hdr_write(out, header) < 0){
34  std::cout << "Unable to write BAM header to path." << std::endl;
35  sam_close(in);
36  return -1;
37  }
38  if(header == NULL) {
39  sam_close(in);
40  std::cout << "Unable to open BAM/SAM header." << std::endl;
41  }
42  if (region_.empty()){
43  std::cout << "Number of references: " << header->n_targets << std::endl;
44  for (int i = 0; i < header->n_targets; ++i){
45  std::cout << "Reference Name: " << header->target_name[i] << std::endl;
46  std::cout << "Reference Length: " << header->target_len[i] << std::endl;
47  if(i==0){
48  region_.assign(header->target_name[i]);
49  }
50  }
51  std::cout << "Using Region: " << region_ << std::endl;
52  }
53  std::string temp(header->text);
54  std::string sortFlag ("SO:coordinate");
55  if (temp.find(sortFlag)) {
56  std::cout << "Sorted By Coordinate" << std::endl; // Sort by coordinate
57  } else {
58  std::cout << "Not sorted" << std::endl;
59  }
60  //Initialize iterator
61  hts_itr_t *iter = NULL;
62  //Move the iterator to the region we are interested in
63  iter = sam_itr_querys(idx, header, region_.c_str());
64  if(header == NULL || iter == NULL) {
65  sam_close(in);
66  std::cout << "Unable to iterate to region within BAM/SAM." << std::endl;
67  return -1;
68  }
69  //Initiate the alignment record
70  bam1_t *aln = bam_init1();
71  int ctr = 0;
72  bool w;
73  while(sam_itr_next(in, iter, aln) >= 0) {
74  uint8_t* a = bam_aux_get(aln, "xa");
75  w = true;
76  if(a != 0){
77  for (int i = 0; i < amp_n; ++i){
78  if(bam_aux2i(a) == amplicon[i]){
79  w = false;
80  }
81  }
82  }
83  if(w){
84  if(bam_write1(out, aln) < 0){
85  std::cout << "Not able to write to BAM" << std::endl;
86  hts_itr_destroy(iter);
87  hts_idx_destroy(idx);
88  bam_destroy1(aln);
89  bam_hdr_destroy(header);
90  sam_close(in);
91  bgzf_close(out);
92  return -1;
93  };
94  }
95  ctr++;
96  if(ctr % 100000 == 0){
97  std::cout << ctr << std::endl;
98  }
99  }
100  hts_itr_destroy(iter);
101  hts_idx_destroy(idx);
102  bam_destroy1(aln);
103  bam_hdr_destroy(header);
104  sam_close(in);
105  bgzf_close(out);
106  return 0;
107 }
108 
109 // int main_old(int argc, char* argv[]){
110 // std::string bam = std::string(argv[1]);
111 // std::string region_;
112 // std::string bam_out = std::string(argv[2]);
113 // argc -= 3;
114 // std::cout << argc << std::endl;
115 // int16_t amplicon[argc];
116 // for (int i = 0; i < argc; ++i){
117 // amplicon[i] = atoi(argv[i+3]); // TODO: Convert to int16_t
118 // }
119 // for (int i = 0; i < sizeof(amplicon)/sizeof(*amplicon); ++i){
120 // std::cout << "Amplicon: " << amplicon[i] << " ";
121 // }
122 // std::cout << std::endl;
123 // std::cout << bam_out << std::endl;
124 // if(!bam.empty()) {
125 // //open BAM for reading
126 // samFile *in = hts_open(bam.c_str(), "r");
127 // BGZF *out = bgzf_open(bam_out.c_str(), "w");
128 // if(in == NULL) {
129 // throw std::runtime_error("Unable to open BAM/SAM file.");
130 // }
131 // //Load the index
132 // hts_idx_t *idx = sam_index_load(in, bam.c_str());
133 // if(idx == NULL) {
134 // throw std::runtime_error("Unable to open BAM/SAM index."); // Generate index
135 // }
136 // //Get the header
137 // bam_hdr_t *header = sam_hdr_read(in);
138 // bam_hdr_write(out, header);
139 // if(header == NULL) {
140 // sam_close(in);
141 // throw std::runtime_error("Unable to open BAM header.");
142 // }
143 // if (region_.empty()){
144 // std::cout << "Number of references: " << header->n_targets << std::endl;
145 // for (int i = 0; i < header->n_targets; ++i){
146 // std::cout << "Reference Name: " << header->target_name[i] << std::endl;
147 // std::cout << "Reference Length: " << header->target_len[i] << std::endl;
148 // if(i==0){
149 // region_.assign(header->target_name[i]);
150 // }
151 // }
152 // std::cout << "Using Region: " << region_ << std::endl;
153 // }
154 // std::string temp(header->text);
155 // std::string sortFlag ("SO:coordinate");
156 // if (temp.find(sortFlag)) {
157 // std::cout << "Sorted By Coordinate" << std::endl; // Sort by coordinate
158 // } else {
159 // std::cout << "Not sorted" << std::endl;
160 // }
161 // //Initialize iterator
162 // hts_itr_t *iter = NULL;
163 // //Move the iterator to the region we are interested in
164 // iter = sam_itr_querys(idx, header, region_.c_str());
165 // if(header == NULL || iter == NULL) {
166 // sam_close(in);
167 // throw std::runtime_error("Unable to iterate to region within BAM.");
168 // }
169 // //Initiate the alignment record
170 // bam1_t *aln = bam_init1();
171 // int ctr = 0;
172 // bool w;
173 // while(sam_itr_next(in, iter, aln) >= 0) {
174 // // std::cout << "Query Length: " << bam_cigar2qlen(aln->core.n_cigar, cigar) << std::endl;
175 // // std::cout << "Read Length: " << bam_cigar2rlen(aln->core.n_cigar, cigar) << std::endl;
176 // // std::cout << "Query Start: " << get_query_start(aln) << std::endl;
177 // // std::cout << "Query End: " << get_query_end(aln) << std::endl
178 // uint8_t* a = bam_aux_get(aln, "xa");
179 // w = true;
180 // for (int i = 0; i < sizeof(amplicon)/sizeof(*amplicon); ++i){
181 // if(bam_aux2i(a) == amplicon[i]){
182 // w = false;
183 // }
184 // }
185 // if(w)
186 // bam_write1(out, aln);
187 // ctr++;
188 // if(ctr % 100000 == 0){
189 // std::cout << ctr << std::endl;
190 // }
191 // }
192 // hts_itr_destroy(iter);
193 // hts_idx_destroy(idx);
194 // bam_destroy1(aln);
195 // bam_hdr_destroy(header);
196 // sam_close(in);
197 // bgzf_close(out);
198 // }
199 // return 0;
200 // }