File: BlasrFASTQReader.cpp

package info (click to toggle)
pbseqlib 5.3.4%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 7,020 kB
  • sloc: cpp: 77,246; python: 331; sh: 103; makefile: 42
file content (99 lines) | stat: -rw-r--r-- 2,590 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#include <pbdata/FASTQReader.hpp>

#include <climits>
#include <cmath>
#include <cstdio>

FASTQReader::FASTQReader() : FASTAReader() { endOfReadDelim = '\n'; }

int FASTQReader::GetNext(FASTASequence &seq) { return ((FASTAReader *)this)->GetNext(seq); }

unsigned char FASTQReader::phredQVtoPacbioQV(unsigned char phredQV)
{
    int qual = floor(100.0 * log10(pow(10.0, phredQV / 10.0) - 1.0) + 0.5);
    qual = qual > 250 ? 250 : qual;
    qual = qual < 1 ? 1 : qual;
    return (unsigned char)qual;
}

int FASTQReader::GetNext(FASTQSequence &seq)
{
    seq.Free();  // Free seq before being reused.
    char c;
    while (curPos < fileSize and
           ((c = filePtr[curPos]) == ' ' or c == '\t' or c == '\n' or c == '\r')) {
        curPos++;
    }

    if (curPos >= fileSize) {
        return false;
    }
    GenomeLength p = curPos;
    AdvanceToTitleStart(p, '@');
    CheckValidTitleStart(p, '@');
    ReadTitle(p, seq);
    // Title ends on '\n', consume that;
    p++;
    GenomeLength p2;
    p2 = p;
    while (p2 < fileSize and filePtr[p2] != '\n') {
        p2++;
    }
    if (p2 - p > UINT_MAX) {
        std::cout
            << "ERROR! Reading sequences stored in more than 4Gbytes of space is not supported."
            << std::endl;
        std::exit(EXIT_FAILURE);
    }

    seq.length = p2 - p;
    GenomeLength seqPos;
    if (seq.length > 0) {
        seq.seq = ProtectedNew<Nucleotide>(seq.length);
        p2 = p;
        seqPos = 0;
        while (p2 < fileSize and filePtr[p2] != '\n') {
            seq.seq[seqPos] = filePtr[p2];
            p2++;
            seqPos++;
        }
    } else {
        seq.seq = 0;
    }
    p = p2;

    AdvanceToTitleStart(p, '+');
    CheckValidTitleStart(p, '+');
    while (p < fileSize and filePtr[p] != '\n') {
        p++;
    }
    p++;  // skip '\n'
    p2 = p;
    while (p2 < fileSize and filePtr[p2] != '\n') {
        p2++;
    }
    seq.length = p2 - p;
    if (seq.length > 0) {
        seq.qual.Allocate(seq.length);
        p2 = p;
        GenomeLength seqPos = 0;
        while (p2 < fileSize and filePtr[p2] != '\n') {
            seq.qual[seqPos] = filePtr[p2] - FASTQSequence::charToQuality;
            p2++;
            seqPos++;
        }
    } else {
        seq.qual.data = NULL;
    }
    curPos = p2;
    seq.deleteOnExit = true;
    return 1;
}

int FASTQReader::Advance(int nSteps)
{
    // An advance of a FASTQ file is simply twice the number of
    // advances of FASTA, since each nucleotide sequence has a quality
    // sequence.
    return ((FASTAReader *)this)->Advance(nSteps * 2);
}