File: FASTQReader.cpp

package info (click to toggle)
pbseqlib 0~20161219-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 5,924 kB
  • ctags: 5,123
  • sloc: cpp: 82,727; makefile: 305; python: 239; sh: 8
file content (89 lines) | stat: -rw-r--r-- 2,437 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#include <cstdio>
#include <climits>
#include <cmath>
#include "FASTQReader.hpp"

FASTQReader::FASTQReader() : FASTAReader() {
    endOfReadDelim = '\n';
}

int FASTQReader::GetNext(FASTASequence &seq) {
    return ((FASTAReader*)this)->GetNext(seq);
}

unsigned char FASTQReader::phredQVtoPacbioQV(unsigned char phredQV){
    int qual = floor(100.0 * log10(pow(10.0, phredQV/10.0) - 1.0) + 0.5); 
    qual = qual > 250 ? 250 : qual;
    qual = qual < 1   ? 1   : qual;
    return (unsigned char) qual;
}

int FASTQReader::GetNext(FASTQSequence &seq) {
    seq.Free(); // Free seq before being reused. 
    char c;
    while( curPos < fileSize and ( (c = filePtr[curPos]) == ' ' or c == '\t' or c == '\n' or c == '\r') ) {
        curPos++;
    }

    if (curPos >= fileSize) {
        return false;
    }
    GenomeLength p = curPos;
    AdvanceToTitleStart(p, '@');
    CheckValidTitleStart(p,'@');
    ReadTitle(p, seq);
    // Title ends on '\n', consume that;
    p++;
    GenomeLength p2;
    p2 = p;
    while(p2 < fileSize and filePtr[p2] != '\n') { p2++;}
    if (p2 - p > UINT_MAX) {
        cout << "ERROR! Reading sequences stored in more than 4Gbytes of space is not supported." << endl;
        exit(1);
    }

    seq.length = p2 - p;
    GenomeLength seqPos;
    if (seq.length > 0) {
        seq.seq = ProtectedNew<Nucleotide>(seq.length);
        p2 = p;
        seqPos = 0;
        while(p2 < fileSize and filePtr[p2] != '\n') { seq.seq[seqPos] = filePtr[p2]; p2++; seqPos++;}
    }
    else {
        seq.seq = 0;
    }
    p = p2;

    AdvanceToTitleStart(p,'+');
    CheckValidTitleStart(p,'+');
    while(p < fileSize and filePtr[p] != '\n') { p++;}
    p++; // skip '\n'
    p2 = p;
    while(p2 < fileSize and filePtr[p2] != '\n') { p2++;}
    seq.length = p2 - p;
    if (seq.length > 0) {
        seq.qual.Allocate(seq.length);
        p2 = p;
        GenomeLength seqPos = 0;
        while(p2 < fileSize and filePtr[p2] != '\n') { 
            seq.qual[seqPos] = filePtr[p2] - FASTQSequence::charToQuality;
            p2++; seqPos++;
        }
    }
    else {
        seq.qual.data = NULL;
    }
    curPos = p2;
    seq.deleteOnExit = true;
    return 1;
}



int FASTQReader::Advance(int nSteps) {
    // An advance of a FASTQ file is simply twice the number of
    // advances of FASTA, since each nucleotide sequence has a quality
    // sequence. 
    return ((FASTAReader*)this)->Advance(nSteps*2);
}