1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
|
#include "PairedReadsSet.h"
CPairedReadsSet::CPairedReadsSet()
{
this->initialization();
}
CPairedReadsSet::CPairedReadsSet(const char* InputFile, const char* fileFormat,\
unsigned int expReadStrLineLength, bool in5to3cat3to5Format,\
unsigned int allowedNumOfNinRead, unsigned int readStartIndex)
{
bool bDiscardReadsWN = (allowedNumOfNinRead == 0);
this->initialization();
// TODO rename the uiRead_Length variable to proper name
if (expReadStrLineLength % 2 == 1) {
this->uiRead_Length = (expReadStrLineLength + 1) / 2;
} else { // The uiRead_Length length is the length of each end; not the total read length
this->uiRead_Length = expReadStrLineLength / 2;
}
this->in5to3cat3to5Format = in5to3cat3to5Format;
unsigned int uiCapacity = BUFFERED_READS_SIZE ;
this->F_Reads = new CReadInBitsSet(uiCapacity, this->uiRead_Length);
this->F_Reads->bDiscardReadWithN = bDiscardReadsWN;
this->F_Reads->allowedNumOfNinRead = allowedNumOfNinRead;
this->R_Reads = new CReadInBitsSet(uiCapacity, this->uiRead_Length);
this->R_Reads->bDiscardReadWithN = bDiscardReadsWN;
this->R_Reads->allowedNumOfNinRead = allowedNumOfNinRead;
myStrCpy(this->InputFile, InputFile, FILENAME_MAX);
myStrCpy(this->F_Reads->InputFile, InputFile, FILENAME_MAX);
// chExtName(this->F_Reads->InputFile, "_F");
myStrCpy(this->R_Reads->InputFile, InputFile, FILENAME_MAX);
// chExtName(this->R_Reads->InputFile, "_R");
// The length will be changed for long read with odd bases
this->parser.caNextRead[expReadStrLineLength] = '\0';
this->openAFileReady2GetRead(InputFile, fileFormat, expReadStrLineLength, bDiscardReadsWN);
// Haven't seen csfasta has QUAL file in the 5'-3'3'-5' form
}
CPairedReadsSet::CPairedReadsSet(unsigned int Capacity, unsigned int uiReadLength)
{
this->F_Reads = new CReadInBitsSet(Capacity, uiReadLength);
this->R_Reads = new CReadInBitsSet(Capacity, uiReadLength);
}
CPairedReadsSet::~CPairedReadsSet(void)
{
delete this->F_Reads;
delete this->R_Reads;
delete this->pQualScoresF;
delete this->pQualScoresR;
}
unsigned int CPairedReadsSet::openAFileReady2GetRead\
(const char* InputFile, const char* fileFormat, unsigned int uiExpReadsStrLength, bool bDiscardReadsWN)
{
// const unsigned int uiExpReadsStrLength = this->uiRead_Length * 2;
const unsigned int uiReadStartIndex = 0; // In this format, no base can be removed from the 5' end
myStrCpy(this->InputFile, InputFile, FILENAME_MAX);
this->cFileType = this->parser.openAFileReady2GetRead\
(InputFile, fileFormat, uiReadStartIndex, uiExpReadsStrLength, bDiscardReadsWN);
bool bGetQScores = (this->cFileType == 'Q' || this->cFileType == 'q');
bool bSOLiDReadFormat = (this->cFileType == 'S' || this->cFileType == 'Q');
if ( this->cFileType == 'N') {
return(1);
} else if (bSOLiDReadFormat) {
string msg1 = "Currently, we don't know any SOLiD pair-end reads in the format,";
string msg2 = "that concatenate forward and backward trend together";
LOG_INFO("\nInfo %d: %s\n%s\n", WARNING_LOG, msg1.c_str(), msg2.c_str());
return(1);
} else if (bGetQScores) {
this->F_Reads->pQualScores = new CReadsQualScores(this->uiRead_Length, BUFFERED_READS_SIZE);
this->R_Reads->pQualScores = new CReadsQualScores(this->uiRead_Length, BUFFERED_READS_SIZE);
}
// For csfasta reads, new the quality score space when QUAL files exists
this->F_Reads->clear(BUFFERED_READS_SIZE);
this->R_Reads->clear(BUFFERED_READS_SIZE);
return(BUFFERED_READS_SIZE);
}
void CPairedReadsSet::clearReads(void)
{
this->F_Reads->clear(BUFFERED_READS_SIZE);
this->R_Reads->clear(BUFFERED_READS_SIZE);
}
void CPairedReadsSet::ignoreQScores(void)
{
this->F_Reads->ignoreQScores();
this->R_Reads->ignoreQScores();
}
void CPairedReadsSet::removeExtraTags(void)
{
for (unsigned int i = (unsigned int)(F_Reads->pReadsID->size()); i > this->uiNo_of_Reads; i--) {
this->F_Reads->pReadsID->pop_back(); // remove extra tags
this->R_Reads->pReadsID->pop_back(); // remove extra tags
}
}
void CPairedReadsSet::getQualityScoresFromQUAL(void)
{
if (this->cFileType == 'S') {
this->F_Reads->pQualScores->getQualityScoresFromQUAL(this->F_Reads->pReadsID);
this->R_Reads->pQualScores->getQualityScoresFromQUAL(this->R_Reads->pReadsID);
}
}
/*
unsigned int CPairedReadsSet::get_next_capacity_reads_pairs_from_single_file()
{
bool bStoreQS = (this->R_Reads->pQualScores != NULL) && (this->F_Reads->pQualScores != NULL);
bool bSOLiDReadFormat = (this->cFileType == 'Q' || this->cFileType == 'S');
bool bGetQScores = (this->cFileType == 'Q' || this->cFileType == 'q') && bStoreQS;
this->clearReads();
do {
const char* caNextRead = parser.get_Next_Read(); // get next read and store in this->parser.caNextRead
if (caNextRead[0] == '\0') {
this->parser.pBuf->fflush();
break; // End of the file
} else if (isBadRead(bSOLiDReadFormat, caNextRead, this->longReadLength)) {
this->handleBadread();
} else {
this->save_next_long_read(bSOLiDReadFormat, bGetQScores, this->in5to3cat3to5Format);
}
} while (this->F_Reads->pReadsID->size() < this->F_Reads->pReadsSet->capacity());
printf("Deal read no. %u in %s.\r", this->uiNo_of_Reads, this->InputFile);
this->removeExtraTags();
if(bStoreQS) {
this->getQualityScoresFromQUAL();
}
return((unsigned int)this->R_Reads->pReadsSet->size());
}
*/
unsigned int CPairedReadsSet::getExpReadLength(unsigned int fullReadLength)
{
bool oddReadLength = (fullReadLength % 2 == 1);
unsigned int expFullReadLength = this->uiRead_Length * 2;
if (oddReadLength) {
expFullReadLength --;
}
return(expFullReadLength);
}
void CPairedReadsSet::save_next_read_id(const char* tagLine, char sep)
{
this->F_Reads->save_next_read_id(tagLine, sep);
this->R_Reads->save_next_read_id(tagLine, sep);
}
int CPairedReadsSet::initialization(void)
{
this->uiNo_of_Bad_Reads = 0;
this->uiNo_of_Reads = 0;
this->uiRead_Length = 0;
this->F_Reads = NULL;
this->R_Reads = NULL;
this->pQualScoresF = NULL;
this->pQualScoresR = NULL;
this->in5to3cat3to5Format = false;
return(0);
}
void CPairedReadsSet::handleBadRead(void)
{
// Currently no message for Bad read
if (this->R_Reads->pReadsID->size() > this->R_Reads->uiNo_of_Reads) {
this->R_Reads->pReadsID->pop_back();
}
if (this->F_Reads->pReadsID->size() > this->F_Reads->uiNo_of_Reads) {
this->F_Reads->pReadsID->pop_back();
}
this->uiNo_of_Bad_Reads ++; // double check if the counter is correct
}
|