1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
#include "LongReadsSet.h"
CLongReadsSet::CLongReadsSet(void)
{
}
CLongReadsSet::CLongReadsSet(const char* InputFile, const char* fileFormat,\
unsigned int expReadStrLineLength, unsigned int allowedNumOfNinRead,\
unsigned int readStartIndex) :
CPairedReadsSet(InputFile, fileFormat, expReadStrLineLength, false, allowedNumOfNinRead, readStartIndex)
{
this->longReadLength = expReadStrLineLength;// call parent constructor to open a file for reading long reads with bool in5to3cat3to5Format = false;
}
CLongReadsSet::~CLongReadsSet(void)
{
}
int CLongReadsSet::size()
{
int size1 = this->R_Reads->pReadsSet->size();
int size2 = this->F_Reads->pReadsSet->size();
// Two half should be the same
return(min(size1,size2));
}
void CLongReadsSet::setBadReadOutputFile(FileOutputBuffer* pOut)
{
this->parser.pOBuf = pOut;
}
unsigned int CLongReadsSet::get_next_capacity_long_reads()
{
bool bStoreQS = (this->R_Reads->pQualScores != NULL) && (this->F_Reads->pQualScores != NULL);
bool bSOLiDReadFormat = (this->cFileType == 'Q' || this->cFileType == 'S');
bool bGetQScores = (this->cFileType == 'Q' || this->cFileType == 'q') && bStoreQS;
this->clearReads();
do {
const char* caNextRead = parser.get_Next_Read(); // get next read and store in this->parser.caNextRead
if (caNextRead[0] == '\0') {
this->parser.pBuf->fflush();
break; // End of the file
} else if (isBadRead(bSOLiDReadFormat, caNextRead, this->longReadLength)) {
this->parser.print_Next_Read();
this->handleBadRead();
} else {
this->save_next_long_read(bSOLiDReadFormat, bGetQScores, this->in5to3cat3to5Format);
}
} while (this->F_Reads->pReadsID->size() < this->F_Reads->pReadsSet->capacity());
printf("Deal read no. %u in %s.\r", this->uiNo_of_Reads, this->InputFile);
this->removeExtraTags();
if(bStoreQS) {
this->getQualityScoresFromQUAL();
}
return((unsigned int)this->R_Reads->pReadsSet->size());
}
// The private function store next read in the parser object
// For reads longer than 64 and shorter than 128, reads are store as two parts in two CReadInBits
// For odd read length, the two parts are overlapped with one base.
bool CLongReadsSet::save_next_long_read(bool bSOLiDReadFormat, bool getQScores,\
bool in5to3cat3to5Format)
{
// bool bDiscardReadWithN = this->F_Reads->bDiscardReadWithN && this->R_Reads->bDiscardReadWithN;
char* readSeq = this->parser.caNextRead;
unsigned int fullReadLength = (unsigned int)strlen(readSeq);
unsigned int expFullReadLength = getExpReadLength(fullReadLength);
bool returnV;
if(isBadRead(bSOLiDReadFormat, this->parser.caNextRead, expFullReadLength)) {
return(false);
} else {
if(bSOLiDReadFormat) {
returnV = save_next_long_SOLiD_read(fullReadLength, getQScores);
} else {
returnV = save_next_long_Illumina_read(fullReadLength, getQScores, in5to3cat3to5Format);
}
}
this->save_next_read_id(this->parser.caNextReadTag);
this->uiNo_of_Reads++;
return(returnV);
}
bool CLongReadsSet::save_next_long_Illumina_read(unsigned int fullReadLength, bool getQScores, bool in5to3cat3to5Format)
{
const bool bSOLiDReadFormat = false;
char* readSeq = this->parser.caNextRead;
char* readQS = this->parser.caNextReadQSs;
unsigned int eachPartLength = this->uiRead_Length;
unsigned int secondPartStart = fullReadLength - eachPartLength;
if (in5to3cat3to5Format) {
reverseKmer(&readSeq[secondPartStart]);
if (getQScores) {
reverseKmer(&readQS[secondPartStart]);
}
}
const char* rReadSeq = &readSeq[secondPartStart];
this->R_Reads->save_next_read(rReadSeq, bSOLiDReadFormat);
this->parser.caNextRead[eachPartLength] = '\0';
this->F_Reads->save_next_read(readSeq, bSOLiDReadFormat);
if (getQScores) {
const char* rReadQS = &readQS[secondPartStart];
this->R_Reads->pQualScores->addQSs(rReadQS);
readQS[eachPartLength] = '\0';
this->F_Reads->pQualScores->addQSs(readQS);
}
return(true);
}
// The first base and the following color signals are saved into two parts
bool CLongReadsSet::save_next_long_SOLiD_read(unsigned int fullReadLength, bool getQScores)
{
const bool bSOLiDReadFormat = true;
char* readSeq = this->parser.caNextRead;
char* readQS = this->parser.caNextReadQSs;
unsigned int eachPartLength = this->uiRead_Length;
unsigned int secondPartStart = fullReadLength - eachPartLength;
const char* rReadSeq = &readSeq[secondPartStart];
this->R_Reads->save_next_read(rReadSeq, bSOLiDReadFormat);
this->parser.caNextRead[eachPartLength] = '\0';
this->F_Reads->save_next_read(readSeq, bSOLiDReadFormat);
if (getQScores) {
const char* rReadQS = &readQS[secondPartStart];
this->R_Reads->pQualScores->addQSs(rReadQS);
readQS[eachPartLength] = '\0';
this->F_Reads->pQualScores->addQSs(readQS);
}
return(false);
}
int get_next_capacity_long_paired_reads(CLongReadsSet &set1, CLongReadsSet &set2)
{
bool bStoreQS = (set1.R_Reads->pQualScores != NULL) && (set1.F_Reads->pQualScores != NULL);
bool bGetQScores = (set1.cFileType == 'Q' || set1.cFileType == 'q') && bStoreQS;
bool bSOLiDReadFormat = (set1.cFileType == 'Q' || set1.cFileType == 'S');
set1.clearReads();
set2.clearReads();
do {
const char* caNextRead1 = set1.parser.get_Next_Read();
const char* caNextRead2 = set2.parser.get_Next_Read();
if (caNextRead1[0] == '\0' || caNextRead2[0] == '\0') {
set1.parser.pBuf->fflush();
set2.parser.pBuf->fflush();
break; // End of the file
} else if (isBadRead(bSOLiDReadFormat, caNextRead1, set1.longReadLength) ||
isBadRead(bSOLiDReadFormat, caNextRead2, set2.longReadLength)) {
set1.handleBadRead();
set2.handleBadRead();
} else {
bool in5to3cat3to5Format = false;
set1.save_next_long_read(bSOLiDReadFormat, bGetQScores, in5to3cat3to5Format);
set2.save_next_long_read(bSOLiDReadFormat, bGetQScores, in5to3cat3to5Format);
}
} while (set1.F_Reads->pReadsID->size() < set1.F_Reads->pReadsSet->capacity() &&
set2.F_Reads->pReadsID->size() < set2.F_Reads->pReadsSet->capacity());
printf("Deal read no. %u in %s.\r", set1.uiNo_of_Reads, set1.InputFile);
set1.removeExtraTags();
set2.removeExtraTags();
if(bStoreQS) {
set1.getQualityScoresFromQUAL();
set2.getQualityScoresFromQUAL();
}
return((unsigned int)min(set1.size(), set2.size()));
}
|