1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
|
#include "ReadsQualScores.h"
CReadsQualScores::CReadsQualScores(void)
{
initialization(0, 0);
}
CReadsQualScores::CReadsQualScores(unsigned int readLength, unsigned int numOfReads)
{
this->initialization(readLength, numOfReads);
}
CReadsQualScores::~CReadsQualScores(void)
{
delete [] this->QSarray;
this->QSarray = NULL;
}
void CReadsQualScores::clear(void)
{
this->load = 0;
}
void CReadsQualScores::reserve(unsigned int numOfReads)
{
if (numOfReads > this->numOfReads) {
this->numOfReads = numOfReads;
if (this->QSarray != NULL) {
delete [] this->QSarray; // all stored quality score will be gone
this->QSarray = NULL;
}
this->size = readLength * numOfReads;
this->QSarray = new char[this->size];
memset(this->QSarray, '\0', this->size);
}
}
void CReadsQualScores::initialization(unsigned int readLength, unsigned int numOfReads)
{
this->size = 0; // capacity in bases
this->QSarray = NULL;
this->scoreType = 'I';
this->load = 0; // how many reads' qualities score have been load
this->numOfReads = 0; // must initialize to 0 and reserve
this->readLength = readLength;
this->reserve(numOfReads);
}
/*
* This function open an QUAL file and get the first pReadsID->size quality scores
*/
bool CReadsQualScores::openQUALfile(const char* Filename)
{
if (fileExist(Filename)) {
if (hasTheExtName(Filename, ".qual") ||
hasTheExtName(Filename, ".QUAL")) {
const unsigned int FILE_INPUT_BUFFER_SIZE = 1000000;
this->scoreType = 'S';
this->ifile.open(Filename);
this->IBuf.initialize(FILE_INPUT_BUFFER_SIZE, &ifile);
return(true);
}
}
return(false);
}
unsigned int CReadsQualScores::getQualityScoresFromQUAL(vector<CReadID>* pReadsID)
{
char caBuffer[MAX_LINE];
char caReadTag[MAX_LINE];
caReadTag[0] = '\0';
while (this->load < (unsigned int)pReadsID->size()) {
unsigned int qsIndex = this->load * this->readLength;
if (this->load >= numOfReads) {
const char* theReadId = pReadsID->at(this->load).id;
LOG_INFO("Info %d: Read %s has no quality.\n", WARNING_LOG, theReadId);
break;
}
caBuffer[0] = '\0';
if (this->IBuf.Getline(caBuffer, MAX_LINE) == 0) {
break; // Note this->pBuf->Getline() will return 0 if EOF
} else {
//If this line is header, new line, comment or null line however not EOF, read the next line
if (caBuffer[0] == '>' && ifile.eof() == false) {
myStrCpy(caReadTag, &(caBuffer[1]), READ_ID_LENGTH - 2);
formatReadId(caReadTag);
} else if ((caBuffer[0] == ' ' || caBuffer[0] == '\n' ||
caBuffer[0] == '#' || caBuffer[0] == '\0') && ifile.eof() == false) {
continue;
} else { // No warning for the case that the number of quality score and the read length are not matched.
// check the tag to see if it is the correct place
int iReadId = alignReadId4QScores(caReadTag, this->load, pReadsID);
if (iReadId < 0) {
continue; // bad read, skip its quality
} else {
this->load = (unsigned int)iReadId;
}
int bufferIndex = 0;
for (unsigned int pos = 0; pos < this->readLength; pos++) { //read the quality score for each base of the read
this->QSarray[qsIndex + pos] = (char)atoi(&caBuffer[bufferIndex]);
for (; caBuffer[bufferIndex] != ' '; bufferIndex++) {
if (caBuffer[bufferIndex] == '\0') break;
};
if (caBuffer[bufferIndex] == '\0') break;
bufferIndex++; // mov to the start of next qscore in the buffer
}
this->load ++; // get qs for one more reads
}
}
}
if (this->load < (unsigned int)pReadsID->size()) {
LOG_INFO("Info %d: Not every read has quality score.\n", WARNING_LOG);
for (unsigned int i = this->load; i < (unsigned int)pReadsID->size(); i++) {
cout << "Ex:" << pReadsID->at(i).id << " doesn't get QS" << endl;
break; // show only one
}
LOG_INFO("Info %d: Try arg --delimiter <Symbol> to get the correct read Id.\n", INFO_LOG);
}
return(this->load);
}
int CReadsQualScores::alignReadId4QScores(char* tag, unsigned int searchPoint, vector<CReadID>* pReadsID)
{
if (pReadsID == NULL) {
LOG_INFO("Info %d: No read Id is available.\n", WARNING_LOG);
} else if (searchPoint >= (pReadsID->size())) {
LOG_INFO("Info %d: Request quality scores over the read size %u.\n",\
WARNING_LOG, (unsigned int)pReadsID->size());
return(searchPoint);
} else { // Currently, only check the previous and the next tag if the current one is not matched.
char* expectedTag = pReadsID->at(searchPoint).id;
if (strcmp(expectedTag, tag) == 0) {
return(searchPoint); // read ID correctly aligned
}
// align to next tag
if (pReadsID->size() > searchPoint + 1) {
expectedTag = pReadsID->at(searchPoint + 1).id;
if (strcmp(expectedTag, tag) == 0) {
LOG_INFO("\nInfo %d: Read %s has no quality score.\n",\
WARNING_LOG, pReadsID->at(searchPoint).id);
return(searchPoint + 1);
}
}
// aligned to previous tag
if (pReadsID->size() > 0 && searchPoint > 0) {
expectedTag = pReadsID->at(searchPoint - 1).id;
if (strcmp(expectedTag, tag) == 0) {
LOG_INFO("\nInfo %d: Extra quality scores for read %d.\n",\
WARNING_LOG, searchPoint - 1);
return(searchPoint - 1);
}
}
// cout << "Bad read " << tag << endl;
// STRIKE_KEY2CONTINUE;
}
return(-1); // bad read with N, should skip its quality.
}
void trQScores(unsigned int readLength, char qShift, const char* oldQSs, char* newQSs)
{
unsigned int i;
for (i = 0; i < readLength; i++) {
newQSs[i] = oldQSs[i] + qShift;
}
newQSs[i] = '\0';
}
double getAverageQualityScores(CReadsQualScores& scores)
{
double sumOfQulityScore = 0;
for (unsigned int i = 0; i < scores.load; i++) {
for (unsigned int j = 0; j < scores.readLength; j++) {
sumOfQulityScore += (double)scores.qs(i, j);
}
}
return(sumOfQulityScore / (double)(scores.readLength * scores.load));
}
int alignmentScore(char* str1, char* str2, unsigned int readLength, const char* sc)
{
int score = 0;
for (unsigned int i = 0; i < readLength; i++) {
// sum of mismatched scores
if (str1[i] != str2[i]) {
int baseQuality = (int)sc[i];
if ( baseQuality > 0) {
score += baseQuality;
}
}
}
return(score);
}
void printCommaSepScoresStr(unsigned int readlength, const char* qScores, char* qScoresStr)
{
char scoreStr[MAX_LINE];
int scoreStrLength = 0;
unsigned int i = 0, j = 0;
for (i = 0, j = 0; i < readlength; i++, j+=scoreStrLength) {
sprintf(scoreStr, "%d,", (int)qScores[i]);
scoreStrLength = (int)strlen(scoreStr);
sprintf(&(qScoresStr[j]), "%s", scoreStr);
}
if (j > 0) j--; // removed the last ','
qScoresStr[j] = '\0';
}
|