File: tabix.cpp

package info (click to toggle)
libtabixpp 1.1.0-4
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 384 kB
  • sloc: cpp: 194; makefile: 45; sh: 16
file content (146 lines) | stat: -rw-r--r-- 4,359 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#include "tabix.hpp"

Tabix::Tabix(void) { }

Tabix::Tabix(string& file) {
    has_jumped = false;
    filename = file;
    str.l = 0;
    str.m = 0;
    str.s = NULL;
    const char* cfilename = file.c_str();
    struct stat stat_tbi,stat_vcf;
    char *fnidx = (char*) calloc(strlen(cfilename) + 5, 1);
    strcat(strcpy(fnidx, cfilename), ".tbi");
    if ( bgzf_is_bgzf(cfilename)!=1 )
    {
        cerr << "[tabix++] was bgzip used to compress this file? " << file << endl;
        free(fnidx);
        exit(1);
    }
    // Common source of errors: new VCF is used with an old index
    stat(fnidx, &stat_tbi);
    stat(cfilename, &stat_vcf);
    if ( stat_vcf.st_mtime > stat_tbi.st_mtime )
    {
        cerr << "[tabix++] the index file is older than the vcf file. Please use '-f' to overwrite or reindex." << endl;
        free(fnidx);
        exit(1);
    }
    free(fnidx);

    if ((fn = hts_open(cfilename, "r")) == 0) {
        cerr << "[tabix++] fail to open the data file." << endl;
        exit(1);
    }

    if ((tbx = tbx_index_load(cfilename)) == NULL) {
        cerr << "[tabix++] failed to load the index file." << endl;
        exit(1);
    }

    int nseq;
    const char** seq = tbx_seqnames(tbx, &nseq);
    for (int i=0; i<nseq; i++) {
        chroms.push_back(seq[i]);
    }
    free(seq);

    idxconf = &tbx_conf_vcf;

    // set up the iterator, defaults to the beginning
    current_chrom = chroms.begin();
    iter = tbx_itr_querys(tbx, (current_chrom != chroms.end() ? current_chrom->c_str() : ""));

}

Tabix::~Tabix(void) {
    tbx_itr_destroy(iter);
    tbx_destroy(tbx);
    free(str.s);
}

const kstring_t * Tabix::getKstringPtr(){
    return &str;
}

void Tabix::getHeader(string& header) {
    header.clear();
    while ( hts_getline(fn, KS_SEP_LINE, &str) >= 0 ) {
        if ( !str.l || str.s[0]!=tbx->conf.meta_char ) {
            break;
        } else {
            header += string(str.s);
            header += "\n";
        }
    }
    // set back to start
    current_chrom = chroms.begin();
    if (iter) tbx_itr_destroy(iter);
    iter = tbx_itr_querys(tbx, (current_chrom != chroms.end() ? current_chrom->c_str() : ""));
}

bool Tabix::setRegion(string& region) {
    tbx_itr_destroy(iter);
    iter = tbx_itr_querys(tbx, region.c_str());
    has_jumped = true;
    return true;
}

bool Tabix::getNextLine(string& line) {
    if (has_jumped) {
        if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) {
            line = string(str.s);
            return true;
        } else return false;
    } else { // step through all sequences in the file
        // we've never jumped, so read everything
        if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) {
            line = string(str.s);
            return true;
        } else {
            // While we aren't at the end, advance. While we're still not at the end...
            while (current_chrom != chroms.end() && ++current_chrom != chroms.end()) {
                tbx_itr_destroy(iter);
                iter = tbx_itr_querys(tbx, current_chrom->c_str());
                if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) {
                    line = string(str.s);
                    return true;
                } else {
                    ++current_chrom;
                }
            }
            return false;
        }
    }
}

bool Tabix::getNextLineKS() {
    if (has_jumped) {
        if (iter && 
	    tbx_itr_next(fn, tbx, iter, &str) >= 0) {
            //line = &str;
            return true;
        } else 
	    return false;
    } else { // step through all sequences in the file
        // we've never jumped, so read everything
        if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) {
            //line = &str;
            return true;
        } else {
            // While we aren't at the end, advance. While we're still not at the end...
            while (current_chrom != chroms.end() && ++current_chrom != chroms.end()) {
                tbx_itr_destroy(iter);
                iter = tbx_itr_querys(tbx, current_chrom->c_str());
                if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) {
                    //line = &str;
                    return true;
                } else {
                    ++current_chrom;
                }
            }
            return false;
        }
    }
}