File: tabix.cpp

package info (click to toggle)
libtabixpp 1.0.0-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 372 kB
  • ctags: 41
  • sloc: cpp: 163; makefile: 41; sh: 16
file content (110 lines) | stat: -rw-r--r-- 3,116 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#include "tabix.hpp"

Tabix::Tabix(void) { }

Tabix::Tabix(string& file) {
    has_jumped = false;
    filename = file;
    const char* cfilename = file.c_str();
    struct stat stat_tbi,stat_vcf;
    char *fnidx = (char*) calloc(strlen(cfilename) + 5, 1);
    strcat(strcpy(fnidx, cfilename), ".tbi");
    if ( bgzf_is_bgzf(cfilename)!=1 )
    {
        cerr << "[tabix++] was bgzip used to compress this file? " << file << endl;
        free(fnidx);
        exit(1);
    }
    // Common source of errors: new VCF is used with an old index
    stat(fnidx, &stat_tbi);
    stat(cfilename, &stat_vcf);
    if ( stat_vcf.st_mtime > stat_tbi.st_mtime )
    {
        cerr << "[tabix++] the index file is older than the vcf file. Please use '-f' to overwrite or reindex." << endl;
        free(fnidx);
        exit(1);
    }
    free(fnidx);

    if ((fn = hts_open(cfilename, "r")) == 0) {
        cerr << "[tabix++] fail to open the data file." << endl;
        exit(1);
    }

    if ((tbx = tbx_index_load(cfilename)) == NULL) {
        cerr << "[tabix++] failed to load the index file." << endl;
        exit(1);
    }

    int nseq;
    const char** seq = tbx_seqnames(tbx, &nseq);
    for (int i=0; i<nseq; i++) {
        chroms.push_back(seq[i]);
    }
    free(seq);

    idxconf = &tbx_conf_vcf;

    // set up the iterator, defaults to the beginning
    current_chrom = chroms.begin();
    iter = tbx_itr_querys(tbx, current_chrom->c_str());

}

Tabix::~Tabix(void) {
    tbx_itr_destroy(iter);
    tbx_destroy(tbx);
}

void Tabix::getHeader(string& header) {
    header.clear();
    kstring_t str = {0,0,0};
    while ( hts_getline(fn, KS_SEP_LINE, &str) >= 0 ) {
        if ( !str.l || str.s[0]!=tbx->conf.meta_char ) {
            break;
        } else {
            header += string(str.s);
            header += "\n";
        }
    }
    // set back to start
    current_chrom = chroms.begin();
    if (iter) tbx_itr_destroy(iter);
    iter = tbx_itr_querys(tbx, current_chrom->c_str());
}

bool Tabix::setRegion(string& region) {
    tbx_itr_destroy(iter);
    iter = tbx_itr_querys(tbx, region.c_str());
    has_jumped = true;
    return true;
}

bool Tabix::getNextLine(string& line) {
    kstring_t str = {0,0,0};
    if (has_jumped) {
        if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) {
            line = string(str.s);
            return true;
        } else return false;
    } else { // step through all sequences in the file
        // we've never jumped, so read everything
        if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) {
            line = string(str.s);
            return true;
        } else {
            ++current_chrom;
            while (current_chrom != chroms.end()) {
                tbx_itr_destroy(iter);
                iter = tbx_itr_querys(tbx, current_chrom->c_str());
                if (iter && tbx_itr_next(fn, tbx, iter, &str) >= 0) {
                    line = string(str.s);
                    return true;
                } else {
                    ++current_chrom;
                }
            }
            return false;
        }
    }
}