File: LongReadsSet.cpp

package info (click to toggle)
perm 0.4.0-8
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 976 kB
  • sloc: cpp: 13,499; makefile: 98; sh: 12
file content (162 lines) | stat: -rw-r--r-- 6,849 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#include "LongReadsSet.h"
CLongReadsSet::CLongReadsSet(void)
{
}

CLongReadsSet::CLongReadsSet(const char* InputFile, const char* fileFormat,\
                             unsigned int expReadStrLineLength, unsigned int allowedNumOfNinRead,\
                             unsigned int readStartIndex) :
    CPairedReadsSet(InputFile, fileFormat, expReadStrLineLength, false,	allowedNumOfNinRead, readStartIndex)
{
    this->longReadLength = expReadStrLineLength;// call parent constructor to open a file for reading long reads with  bool in5to3cat3to5Format = false;
}

CLongReadsSet::~CLongReadsSet(void)
{
}

int CLongReadsSet::size()
{
    int size1 = this->R_Reads->pReadsSet->size();
    int size2 = this->F_Reads->pReadsSet->size();
    // Two half should be the same
    return(min(size1,size2));
}

void CLongReadsSet::setBadReadOutputFile(FileOutputBuffer* pOut)
{
    this->parser.pOBuf = pOut;
}

unsigned int CLongReadsSet::get_next_capacity_long_reads()
{
    bool bStoreQS = (this->R_Reads->pQualScores != NULL) && (this->F_Reads->pQualScores != NULL);
    bool bSOLiDReadFormat = (this->cFileType == 'Q' || this->cFileType == 'S');
    bool bGetQScores = (this->cFileType == 'Q' || this->cFileType == 'q') && bStoreQS;
    this->clearReads();
    do {
        const char* caNextRead = parser.get_Next_Read(); // get next read and store in this->parser.caNextRead
        if (caNextRead[0] == '\0') {
            this->parser.pBuf->fflush();
            break; // End of the file
        } else if (isBadRead(bSOLiDReadFormat, caNextRead, this->longReadLength)) {
            this->parser.print_Next_Read();
            this->handleBadRead();
        } else {
            this->save_next_long_read(bSOLiDReadFormat, bGetQScores, this->in5to3cat3to5Format);
        }
    } while (this->F_Reads->pReadsID->size() < this->F_Reads->pReadsSet->capacity());
    printf("Deal read no. %u in %s.\r", this->uiNo_of_Reads, this->InputFile);
    this->removeExtraTags();
    if(bStoreQS) {
        this->getQualityScoresFromQUAL();
    }
    return((unsigned int)this->R_Reads->pReadsSet->size());
}

// The private function store next read in the parser object
// For reads longer than 64 and shorter than 128, reads are store as two parts in two CReadInBits
// For odd read length, the two parts are overlapped with one base.
bool CLongReadsSet::save_next_long_read(bool bSOLiDReadFormat, bool getQScores,\
                                        bool in5to3cat3to5Format)
{
    // bool bDiscardReadWithN = this->F_Reads->bDiscardReadWithN && this->R_Reads->bDiscardReadWithN;
    char* readSeq = this->parser.caNextRead;
    unsigned int fullReadLength = (unsigned int)strlen(readSeq);
    unsigned int expFullReadLength = getExpReadLength(fullReadLength);
    bool returnV;
    if(isBadRead(bSOLiDReadFormat, this->parser.caNextRead, expFullReadLength)) {
        return(false);
    } else {
        if(bSOLiDReadFormat) {
            returnV = save_next_long_SOLiD_read(fullReadLength, getQScores);
        } else {
            returnV = save_next_long_Illumina_read(fullReadLength, getQScores, in5to3cat3to5Format);
        }
    }
    this->save_next_read_id(this->parser.caNextReadTag);
    this->uiNo_of_Reads++;
    return(returnV);
}

bool CLongReadsSet::save_next_long_Illumina_read(unsigned int fullReadLength, bool getQScores, bool in5to3cat3to5Format)
{
    const bool bSOLiDReadFormat = false;
    char* readSeq = this->parser.caNextRead;
    char* readQS  = this->parser.caNextReadQSs;
    unsigned int eachPartLength = this->uiRead_Length;
    unsigned int secondPartStart = fullReadLength - eachPartLength;
    if (in5to3cat3to5Format) {
        reverseKmer(&readSeq[secondPartStart]);
        if (getQScores) {
            reverseKmer(&readQS[secondPartStart]);
        }
    }
    const char* rReadSeq = &readSeq[secondPartStart];
    this->R_Reads->save_next_read(rReadSeq, bSOLiDReadFormat);
    this->parser.caNextRead[eachPartLength] = '\0';
    this->F_Reads->save_next_read(readSeq, bSOLiDReadFormat);
    if (getQScores) {
        const char* rReadQS = &readQS[secondPartStart];
        this->R_Reads->pQualScores->addQSs(rReadQS);
        readQS[eachPartLength] = '\0';
        this->F_Reads->pQualScores->addQSs(readQS);
    }
    return(true);
}

// The first base and the following color signals are saved into two parts
bool CLongReadsSet::save_next_long_SOLiD_read(unsigned int fullReadLength, bool getQScores)
{
    const bool bSOLiDReadFormat = true;
    char* readSeq = this->parser.caNextRead;
    char* readQS  = this->parser.caNextReadQSs;
    unsigned int eachPartLength = this->uiRead_Length;
    unsigned int secondPartStart = fullReadLength - eachPartLength;
    const char* rReadSeq = &readSeq[secondPartStart];
    this->R_Reads->save_next_read(rReadSeq, bSOLiDReadFormat);
    this->parser.caNextRead[eachPartLength] = '\0';
    this->F_Reads->save_next_read(readSeq, bSOLiDReadFormat);
    if (getQScores) {
        const char* rReadQS = &readQS[secondPartStart];
        this->R_Reads->pQualScores->addQSs(rReadQS);
        readQS[eachPartLength] = '\0';
        this->F_Reads->pQualScores->addQSs(readQS);
    }
    return(false);
}

int get_next_capacity_long_paired_reads(CLongReadsSet &set1, CLongReadsSet &set2)
{
    bool bStoreQS = (set1.R_Reads->pQualScores != NULL) && (set1.F_Reads->pQualScores != NULL);
    bool bGetQScores = (set1.cFileType == 'Q' || set1.cFileType == 'q') && bStoreQS;
    bool bSOLiDReadFormat = (set1.cFileType == 'Q' || set1.cFileType == 'S');
    set1.clearReads();
    set2.clearReads();
    do {
        const char* caNextRead1 = set1.parser.get_Next_Read();
        const char* caNextRead2 = set2.parser.get_Next_Read();
        if (caNextRead1[0] == '\0' || caNextRead2[0] == '\0') {
            set1.parser.pBuf->fflush();
            set2.parser.pBuf->fflush();
            break; // End of the file
        } else if (isBadRead(bSOLiDReadFormat, caNextRead1, set1.longReadLength) ||
                   isBadRead(bSOLiDReadFormat, caNextRead2, set2.longReadLength)) {
            set1.handleBadRead();
            set2.handleBadRead();
        } else {
            bool in5to3cat3to5Format = false;
            set1.save_next_long_read(bSOLiDReadFormat, bGetQScores, in5to3cat3to5Format);
            set2.save_next_long_read(bSOLiDReadFormat, bGetQScores, in5to3cat3to5Format);
        }
    } while (set1.F_Reads->pReadsID->size() < set1.F_Reads->pReadsSet->capacity() &&
             set2.F_Reads->pReadsID->size() < set2.F_Reads->pReadsSet->capacity());
    printf("Deal read no. %u in %s.\r", set1.uiNo_of_Reads, set1.InputFile);
    set1.removeExtraTags();
    set2.removeExtraTags();
    if(bStoreQS) {
        set1.getQualityScoresFromQUAL();
        set2.getQualityScoresFromQUAL();
    }
    return((unsigned int)min(set1.size(), set2.size()));
}