1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
|
#pragma once
/*
* This function encode a string of DNA sequence into 2 unsigned int.
* For a 32 bit machine, it can has only 32 bases.
* For a 64 bit machine, it can has only 64 bases
*/
#include "bitsOperationUtil.h"
#include <iostream>
using namespace std;
class CReadInBits
{
public:
CReadInBits(void);
CReadInBits(const char* caRead);
CReadInBits(const char* caRead, int readlength);
~CReadInBits(void);
WORD_SIZE UpperBits;
WORD_SIZE LowerBits;
static int iReadLength;
unsigned int encode(const char* caRead);
unsigned int encode(const char* caRead, int readlength);
unsigned int encodeRead_NasA(const char* caRead, int readlength);
char* decode(char* caRead);
int* decode(int* iaRead) const; // return 0, 1, 2, 3 instead of A, C, G, T in the array
// Return 0 for A, 1 for C, 2 for G, 3 for T in that position
inline WORD_SIZE decode(int basePosition);
inline CReadInBits getSuffixStr(unsigned int shift) const;
inline CReadInBits getPrefixStr(unsigned int length) const;
bool operator==(const CReadInBits &other) const;
bool operator<(const CReadInBits &other) const;
const static unsigned int MAX_READ_LENGTH_IN_BITS = 63;
};
// Return 0 for A, 1 for C, 2 for G, 3 for T in that position
inline WORD_SIZE CReadInBits::decode(int basePosition)
{
if (basePosition < CReadInBits::iReadLength) {
WORD_SIZE upperbit = (this->UpperBits >> basePosition) & 0x01;
WORD_SIZE lowerbit = (this->LowerBits >> basePosition) & 0x01;
return((upperbit << 0x01) + lowerbit);
} else {
return 5;
}
}
inline CReadInBits CReadInBits::getSuffixStr(unsigned int shift) const
{
CReadInBits r;
r.UpperBits = this->UpperBits >> shift;
r.LowerBits = this->LowerBits >> shift;
return (r);
}
inline CReadInBits CReadInBits::getPrefixStr(unsigned int length) const
{
CReadInBits r;
unsigned int maskedSuffixLength = (wordSize - length);
r.UpperBits = this->UpperBits << maskedSuffixLength;
r.UpperBits >>= maskedSuffixLength;
r.LowerBits = this->LowerBits << maskedSuffixLength;;
r.LowerBits >>= maskedSuffixLength;
return (r);
}
inline WORD_SIZE getDiffBits (CReadInBits A, CReadInBits B)
{
WORD_SIZE upperBitsDiff = A.UpperBits ^ B.UpperBits;
WORD_SIZE lowerBitsDiff = A.LowerBits ^ B.LowerBits;
return(upperBitsDiff | lowerBitsDiff);
}
// reverse complement the two encoded read
void reverseCompliment(unsigned int uiReadLength, WORD_SIZE* UpperBits, WORD_SIZE* LowerBits);
CReadInBits reverseCompliment(unsigned int uiReadLength, CReadInBits r);
// Calculate the difference of bits
// unsigned int bitsStrCompare(WORD_SIZE UpperBits1, WORD_SIZE LowerBits1, WORD_SIZE UpperBits2, WORD_SIZE LowerBits2);
unsigned int bitsStrCompare(CReadInBits r1, CReadInBits r2);
// compare the last N bits only
// unsigned int bitsStrNCompare(WORD_SIZE UpperBits1, WORD_SIZE LowerBits1, WORD_SIZE UpperBits2, WORD_SIZE LowerBits2);
// count the diff of the first N base pairs.
unsigned int bitsStrNCompare(CReadInBits r1, CReadInBits r2, unsigned int N);
// skip the first M base pair and count the diff of the following N base pair
unsigned int bitsStrMNCompare(CReadInBits r1, CReadInBits r2, unsigned int M, unsigned int N);
inline CReadInBits reverseBitsSignals(CReadInBits& readInBits, int extraBitsNo)
{
// int extraBitsNo = wordSize - bitStrNo - 1;
CReadInBits reverseBits = readInBits;
reverseBits.LowerBits = reverse64bits(reverseBits.LowerBits);
reverseBits.UpperBits = reverse64bits(reverseBits.UpperBits);
reverseBits.LowerBits >>= extraBitsNo;
reverseBits.UpperBits >>= extraBitsNo;
return(reverseBits);
}
inline int printBitsStrCompare(CReadInBits exp, CReadInBits actual, const char* msg)
{
char caExp[wordSize + 1];
char caActual[wordSize + 1];
exp.decode(caExp);
actual.decode(caActual);
for (int i = 0; i < CReadInBits::iReadLength; i++) {
if (caExp[i] != caActual[i]) {
caActual[i] = (char)tolower(caActual[i]);
}
}
cout << msg << endl;
cout << caExp << endl;
cout << caActual << endl;
return(0);
}
inline int printBitsStr(CReadInBits strInBits, int length)
{
char caStr[wordSize + 1];
strInBits.decode(caStr);
caStr[length] = '\0';
printf("%s\n", caStr);
return(0);
}
inline int printBitsStr(WORD_SIZE strInBits, int length)
{
char caStr[wordSize + 1];
for (int i = 0; i < length; i++) {
caStr[i] = (strInBits & 0x01) ? '1': '0';
strInBits >>= 0x01;
}
caStr[length] = '\0';
printf("%s\n", caStr);
return(0);
}
inline WORD_SIZE SHIFT_LEFT(WORD_SIZE strInBits, int digit)
{
if (digit >= 32) {
strInBits <<= 31;
return(strInBits << (digit - 31));
} else {
return(strInBits << digit);
}
}
unsigned int encodeRead(const char* caRead, int iReadLength, WORD_SIZE* encodUpperBits, WORD_SIZE* encodedLowerBits);
unsigned int encodeReadNasA(const char* caRead, int uiReadLength, WORD_SIZE* encodUpperBits, WORD_SIZE* encodedLowerBits);
unsigned int encodeLongRead(const char* read, CReadInBits& firstHalf, CReadInBits& secondHalf);
unsigned int decodeRead(char* caRead, int iReadLength, WORD_SIZE UpperBits, WORD_SIZE LowerBits);
unsigned int decodeRead(int* iaRead, int iReadLength, WORD_SIZE UpperBits, WORD_SIZE LowerBits);
unsigned int decodeLongRead(CReadInBits& firstHalf, CReadInBits& secondHalf, char* read, bool oddReadLength);
|