1 #include "htslib/vcf.h" 11 #define GET_MAX_INDICES(type_t, pu, no, m) { \ 13 type_t *p = (type_t*) (pu); \ 14 for (int k = 0; k < no; ++k){ \ 19 } else if(p[k] == v) { \ 31 static inline char gt2iupac(
char a,
char b)
33 static const char iupac[4][4] = { {
'A',
'M',
'R',
'W'},{
'M',
'C',
'S',
'Y'},{
'R',
'S',
'G',
'K'},{
'W',
'Y',
'K',
'T'} };
34 if ( a>=
'a' ) a -=
'a' -
'A';
35 if ( b>=
'a' ) b -=
'a' -
'A';
37 else if ( a==
'C' ) a = 1;
38 else if ( a==
'G' ) a = 2;
39 else if ( a==
'T' ) a = 3;
42 else if ( b==
'C' ) b = 1;
43 else if ( b==
'G' ) b = 2;
44 else if ( b==
'T' ) b = 3;
46 return iupac[(int)a][(
int)b];
49 void get_consensus_indel(
char **
allele,
maj m,
char *&nuc){
50 int max_len = 0, l = 0;
52 for (
int k = 0; k <= m.n; ++k){
53 if(strlen(
allele[m.ind[k]]) > max_len)
54 max_len = strlen(
allele[m.ind[k]]);
57 for (
int j = 0; j < max_len; ++j){
59 for (
int k = 0; k <= m.n ; ++k){
60 if(j < strlen(
allele[m.ind[k]])){
64 n = gt2iupac(n,
allele[m.ind[k]][j]);
73 if(l % INDEL_SIZE == 0){
74 nuc = (
char*) realloc(nuc, INDEL_SIZE * ((l/INDEL_SIZE) + 1) *
sizeof(char));
80 int main_old(
int argc,
char* argv[]) {
81 std::cout <<
"Path " << argv[1] <<std::endl;
82 std::string bcf = std::string(argv[1]);
84 std::string out_path = std::string(argv[2]);
86 region_ = std::string(argv[3]);
90 htsFile *in = bcf_open(bcf.c_str(),
"r");
91 std::ofstream out(out_path);
92 out <<
">" << out_path <<
"\n";
94 throw std::runtime_error(
"Unable to open BCF/VCF file.");
98 bcf_hdr_t *header = bcf_hdr_read(in);
101 throw std::runtime_error(
"Unable to open BCF header.");
103 ad_id = bcf_hdr_id2int(header, BCF_DT_ID,
"AD");
104 std::cout << ad_id <<
" " << std::endl;
106 bcf1_t *v = bcf_init();
107 int ctr = 0, prev_pos = 0, s, d;
109 char *nuc =(
char *) malloc(INDEL_SIZE *
sizeof(
char)), *r;
110 while(bcf_read(in, header, v) ==0){
113 for (
int i = prev_pos; i < v->pos - 1; ++i){
116 bcf_unpack(v, BCF_UN_FMT);
117 bcf_unpack(v, BCF_UN_STR);
118 bcf_fmt_t *fmt = v->d.fmt;
119 for (
int i = 0; i < v->n_fmt; ++i){
120 if(strcmp(header->id[BCF_DT_ID][fmt[i].id].key,
"AD") == 0){
121 m.ind = (
int *) malloc(fmt[i].n *
sizeof(
int));
124 case BCF_BT_INT8: GET_MAX_INDICES(int8_t, fmt[i].p, fmt[i].n, m);
break;
125 case BCF_BT_INT16: GET_MAX_INDICES(int16_t, fmt[i].p, fmt[i].n, m);
break;
126 case BCF_BT_INT32: GET_MAX_INDICES(int32_t, fmt[i].p, fmt[i].n, m);
break;
127 case BCF_BT_FLOAT: GET_MAX_INDICES(
float, fmt[i].p, fmt[i].n, m);
break;
128 default: hts_log_error(
"Unexpected type %d", fmt[i].type); exit(1);
133 if(bcf_get_variant_types(v) == VCF_REF){
134 out << v->d.allele[0];
135 }
else if(bcf_get_variant_types(v) == VCF_SNP || bcf_get_variant_types(v) == VCF_MNP){
136 nuc =(
char *) malloc(INDEL_SIZE *
sizeof(
char));
137 get_consensus_indel(v->d.allele, m, nuc);
140 nuc =(
char *) malloc(INDEL_SIZE *
sizeof(
char));
141 get_consensus_indel(v->d.allele, m, nuc);
142 s = strlen(v->d.allele[0]) - strlen(nuc);
143 d = (s > 0) ? strlen(v->d.allele[0]) : strlen(nuc);
144 r = (s > 0) ? v->d.allele[0] : nuc;
145 s = (s < 0) ? -1 * s : s;
147 std::cout <<
"Number: " << m.n <<
" Maj Allele: " << v->d.allele[m.ind[0]] << strlen(nuc) <<
" - Nuc Length" << nuc <<
" " << s <<
" " << d << std::endl;
148 for (
int i = 0; i < s; ++i){
161 std::cout << ctr << std::endl;
165 bcf_hdr_destroy(header);