1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
|
#pragma once
#ifndef COLOR_SPACE_READ_H
#define COLOR_SPACE_READ_H
#include "ReadInBits.h"
#include "ShortReadUtil.h"
#include "stdafx.h"
/*
* This file provides functions translated from base space to color space and vice versa
* encoded class CReadInBits.
*/
// The first base is encoded in first bit returned CReadInBits
CReadInBits colors2Bases(CReadInBits readInColors);
// Translate color read in B0123 format to CReadInBits
bool encodeColors(const char* colorsStr, CReadInBits& readInColors);
bool encodeColorsNas3(const char* colorsStr, CReadInBits& readInColors);
// Translate color read in readInBits format to c_str in B0123 format
char* decodeColors(char* colorsStr, CReadInBits readInColors);
char* decodePureColors(char* colorsStr, CReadInBits readInColors);
char* decodeLongColors(char* colorsStr, CReadInBits readInColors1stHalf, CReadInBits readInColors2ndHalf, bool oddRead);
inline char* decodeColorReadWithPrimer(char* caRead, CReadInBits readInColors)
{
decodeColors(&(caRead[1]), readInColors);
caRead[0] = caRead[1]; // Mimic the first base from the primer
caRead[1] = '0';
return(caRead);
}
char* correctAndDecodeRead \
(CReadInBits readInColors, CReadInBits refInColors, bool correct, char* caRead, char* caQscore);
const static char color_transfer_matrix[][4] = {
{ '0', '1' , '2', '3' },
{ '1', '0' , '3', '2' },
{ '2', '3', '0', '1' },
{ '3', '2', '1', '0' }
};
inline void basesStr2ColorStr(const char* readInBase, char* readInColor)
{
readInColor[0] = readInBase[0];
readInColor[1] = '0';
for (unsigned int i = 1; i < MAX_READ_LENGTH * 2; i++) {
char b = readInBase[i - 1];
char c = readInBase[i];
if (c == '\0') {
readInColor[i + 1] = '\0'; // TODO Be carefule about the input without '\0';
break;
} else {
readInColor[i + 1] = color_transfer_matrix[nt2Id(b)][nt2Id(c)];
}
}// TODO Be carefule about the length 128bp input
}
// Encode the first base in the first bit
inline CReadInBits bases2Colors(CReadInBits readInBase)
{
CReadInBits readInColor;
//Note the first bits store the first bases of the read. The following bits are color transition
readInColor.UpperBits = readInBase.UpperBits ^(readInBase.UpperBits << 1);
readInColor.LowerBits = readInBase.LowerBits ^(readInBase.LowerBits << 1);
//Note the most significant digit is useless
return(readInColor);
}
// Don't encode the first base in the first bit, colors only
inline CReadInBits bases2PureColors(CReadInBits readInBase)
{
CReadInBits readInColor;
//Note bits are color transitions
readInColor.UpperBits = readInBase.UpperBits ^(readInBase.UpperBits >> 1);
readInColor.LowerBits = readInBase.LowerBits ^(readInBase.LowerBits >> 1);
//Note the most significant digit is useless
return(readInColor);
}
// reverse colors signals. (The reverse compliment of a read
inline CReadInBits reversePureColors(CReadInBits readInPureColors, unsigned int colorsLength)
{
CReadInBits reverseColors;
// Note the first bit (least significant) store the first bases of the read. The following bits are color transition
unsigned int tailLength = (wordSize - colorsLength);
reverseColors.UpperBits = (reverse64bits(readInPureColors.UpperBits) >> tailLength);
reverseColors.LowerBits = (reverse64bits(readInPureColors.LowerBits) >> tailLength);
//Note the most significant digit is useless
return(reverseColors);
}
// Encode the first base in the first bit (least significant)
inline void longBases2Colors(CReadInBits& readInBase1stHalf, CReadInBits& readInBase2ndHalf, CReadInBits& readInColor1stHalf, CReadInBits& readInColor2ndHalf, bool oddReadLength)
{
readInColor1stHalf = bases2Colors(readInBase1stHalf);
readInColor2ndHalf = bases2Colors(readInBase2ndHalf);
// TO set the transition color bit between the two part of the read
// Get the last bit of first half
// To get the first bit of the second half
unsigned long long upperBit1, lowerBit1, upperBit2, lowerBit2, transitionUpperBit, transitionLowerBit;
unsigned int shiftPos;
if (oddReadLength) {
shiftPos = (unsigned int)CReadInBits::iReadLength - 0x02;
} else {
shiftPos = (unsigned int)CReadInBits::iReadLength - 0x01;
}
upperBit1 = longlongShiftRight(readInBase1stHalf.UpperBits, shiftPos) & 0x01;
lowerBit1 = longlongShiftRight(readInBase1stHalf.LowerBits, shiftPos) & 0x01;
upperBit2 = readInBase2ndHalf.UpperBits & 0x01;
lowerBit2 = readInBase2ndHalf.LowerBits & 0x01;
transitionUpperBit = (upperBit1 ^ upperBit2) & 0x01;
transitionLowerBit = (lowerBit1 ^ lowerBit2) & 0x01;
// set the first bit of readInColor2ndHalf as the transition bit
readInColor2ndHalf.UpperBits >>= 0x01;
readInColor2ndHalf.UpperBits <<= 0x01;
if (transitionUpperBit > 0) {
readInColor2ndHalf.UpperBits += 0x01;
}
readInColor2ndHalf.LowerBits >>= 0x01;
readInColor2ndHalf.LowerBits <<= 0x01;
if (transitionLowerBit > 0) {
readInColor2ndHalf.LowerBits += 0x01;
}
}
// Correct the single mismatched from the read in color.
// SNP_FLAG is a bits string as flag that should take from the readInColors (SNP caused color mismatches)
inline CReadInBits correctReadInColorSpace(CReadInBits readInColors, CReadInBits refInColors, WORD_SIZE SNP_FLAG)
{
CReadInBits correctedReadInColors;
correctedReadInColors.UpperBits = (readInColors.UpperBits & SNP_FLAG) | (refInColors.UpperBits & ~SNP_FLAG);
correctedReadInColors.LowerBits = (readInColors.LowerBits & SNP_FLAG) | (refInColors.LowerBits & ~SNP_FLAG) ;
return(correctedReadInColors);
}
// use a matrix for translate a base and color to the next base
// translate a base and color to the next base
const static char base_transfer_matrix[][4] = {
{ 'A', 'C' , 'G', 'T' },
{ 'C', 'A' , 'T', 'G' },
{ 'G', 'T', 'A', 'C' },
{ 'T', 'G', 'C', 'A' }
};
inline char base2Color(char base, char color)
{
switch (base) {
case 'A':
case 'a':
return (base_transfer_matrix[0][color - '0']);
case 'C':
case 'c':
return (base_transfer_matrix[1][color - '0']);
case 'G':
case 'g':
return (base_transfer_matrix[2][color - '0']);
case 'T':
case 't':
return (base_transfer_matrix[3][color - '0']);
default:
return(0);
}
}
// If there are two consecutive mismatched colors, check if it is a valid SNP.
// Return # of mismatches or -1 -2 -3 for valid to represent Complement, Transversion or Transition.
int getSNPtype(CReadInBits readInColors, CReadInBits refInColors);
// Translate the -1 -2 -3 to a character as flag
inline char returnSNPtype(CReadInBits readInColors, CReadInBits refInColors)
{
int diffOrSNPFLag = getSNPtype(readInColors, refInColors);
switch (diffOrSNPFLag) {
case -1:
return('C');
case -2:
return('V');
case -3:
return('T');
default:
return('N');
}
}
inline int returnSNPtype(char c)
{
switch (c) {
case 'C':
return(1);
case 'V':
return(2);
case 'T':
return(3);
default:
return(0);
}
}
// correct the single mismatches in the
int correctReadInColorSpace(CReadInBits readInColors, CReadInBits refInColors, CReadInBits& correctedRead);
void colorQV2baseQV(CReadInBits readInColors, CReadInBits& correctedRead, char* Qscores);
bool colorQV2baseQV(WORD_SIZE singleColorErrorflag, char* Qscores, unsigned int readLength);
inline bool setFirstBase(char c, CReadInBits& readInBase)
{
switch (c) {
case 'a':
case 'A':
case '0':
readInBase.UpperBits &= (~0x01);
readInBase.LowerBits &= (~0x01);
return(true);
case 'c':
case 'C':
case '1':
readInBase.UpperBits &= (~0x01);
readInBase.LowerBits |= 0x01;
return(true);
case 'g':
case 'G':
case '2':
readInBase.UpperBits |= 0x01;
readInBase.LowerBits &= (~0x01);
return(true);
case 't':
case 'T':
case '3':
readInBase.UpperBits |= 0x01;
readInBase.LowerBits |= 0x01;
return(true);
case 'N':
// The first base N' keep the original as random
return(false);
default:
if (isprint(c)) {
cout << "Unknown character " << c << " in the first base of read." << endl;
} else {
cout << "Unknown character with ascii" << (int)c << " in the first base of read." << endl;
}
return(false);
}
}
inline CReadInBits reverseColorRead(CReadInBits readInColors)
{
// Leave one more bit to put the end base bit
const int shiftBitsAfterReverse = wordSize - CReadInBits::iReadLength - 1;
CReadInBits reverseColors = reverseBitsSignals(readInColors, shiftBitsAfterReverse);
// Fix the first base is not correct after the reverse
CReadInBits readInBases = colors2Bases(readInColors);
bool lowerLastBaseBit = isKthBitSet(readInBases.LowerBits, CReadInBits::iReadLength - 1);
bool upperLastBaseBit = isKthBitSet(readInBases.UpperBits, CReadInBits::iReadLength - 1);
setKthBit(reverseColors.LowerBits, 0, lowerLastBaseBit);
setKthBit(reverseColors.UpperBits, 0, upperLastBaseBit);
return(reverseColors);
}
inline void reverseLongColorRead(CReadInBits& colorRead1stHalf, CReadInBits& colorRead2ndHalf, bool oddReadLength)
{
// Fix the first base is not correct after the reverse
/*
CReadInBits readInBases = colors2Bases(readInColors);
if(oddReadLength) {
const int shiftBitsAfterReverse1 = wordSize - CReadInBits::iReadLength;
CReadInBits revColorRead2 = reverseBitsSignals(colorRead1stHalf, shiftBitsAfterReverse);
const int shiftBitsAfterReverse2 = wordSize - CReadInBits::iReadLength - 1;
CReadInBits revColorRead1 = reverseBitsSignals(colorRead2ndHalf, shiftBitsAfterReverse);
} else {
const int shiftBitsAfterReverse = wordSize - CReadInBits::iReadLength - 1;
CReadInBits revColorRead2 = reverseBitsSignals(colorRead1stHalf, shiftBitsAfterReverse);
CReadInBits revColorRead1 = reverseBitsSignals(colorRead2ndHalf, shiftBitsAfterReverse);
}
*/
}
char* decodeLongColors(char* colorsStr, CReadInBits readInColors1stHalf, CReadInBits readInColors2ndHalf, bool oddReadLength);
void testShift64Bit(void);
void testLongBases2ColorsCases(void);
void testLongBases2Colors(const char* longRead, const char* expLongColorSignals);
void testReverseColorSignals(const char* colorSignalStr);
void assertSNP(int SNPType, CReadInBits refInColors, CReadInBits crInColors);
// Given strings in bases, return the corresponding color signal in A=0 C=1 G=2, T=3 Format
string readInBases2ColorsInACGT_Format(string readInBases);
// Given color Read in ACGT Format, return correspond string in A=0 C=1 G=2, T=3 Format
// Note the first base is duplicated.
string colorReadInACGTto0123Format(string colorReadInACGT);
#endif /* COLOR_SPACE_READ_H */
|