File: tabix_vcf.cpp

package info (click to toggle)
seqan2 2.5.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 228,748 kB
  • sloc: cpp: 257,602; ansic: 91,967; python: 8,326; sh: 1,056; xml: 570; makefile: 229; awk: 51; javascript: 21
file content (65 lines) | stat: -rw-r--r-- 1,774 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#include <seqan/basic.h>
#include <seqan/sequence.h>
#include <seqan/vcf_io.h>
#include <seqan/tabix_io.h>

using namespace seqan2;

int main(int argc, char const * argv[])
{
    if (argc < 5)
    {
        std::cerr << "USAGE: " << argv[0] << " <variants.vcf.gz> <contig> <begin> <end>\n";
        return 0;
    }

    // Open VCF file
    VcfFileIn vcfFile;
    if (!open(vcfFile, argv[1]))
    {
        std::cerr << "ERROR: Could not open " << argv[1] << " for reading.\n";
        return 1;
    }

    // Read header (to get the contig names)
    seqan2::VcfHeader header;
    readHeader(header, vcfFile);

    // Open Tabix index
    std::string tbiFileName = (std::string)argv[1] + ".tbi";
    TabixIndex tabixIndex;
    if (!open(tabixIndex, tbiFileName.c_str()))
    {
        std::cerr << "ERROR: Could not read Tabix index file " << tbiFileName << "\n";
        return 1;
    }

    // Search overlapping variants
    bool hasEntries = false;
    int begPos = lexicalCast<int>(argv[3]);
    int endPos = lexicalCast<int>(argv[4]);
    if (!jumpToRegion(vcfFile,
                      hasEntries,
                      argv[2],
                      begPos,
                      endPos,
                      tabixIndex))
    {
        std::cerr << "Contig " << argv[2] << " not found!\n";
        return 1;
    }

    seqan2::VcfRecord record;
    while (hasEntries && !atEnd(vcfFile))
    {
        readRecord(record, vcfFile);

        // If we are on the next reference or at the end already then we stop.
        if (record.rID == -1 || contigNames(context(vcfFile))[record.rID] != argv[2] || record.beginPos >= endPos)
            break;

        std::cout << record.beginPos << '\t' << record.ref << '\t' << record.alt << std::endl;
    }

    return 0;
}