File: ChromosomeInBits.cpp

package info (click to toggle)
perm 0.4.0-8
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 976 kB
  • sloc: cpp: 13,499; makefile: 98; sh: 12
file content (112 lines) | stat: -rw-r--r-- 4,170 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#include "stdafx.h"
#include "chromosomeInBits.h"

CChromosomeInBits::CChromosomeInBits(void)
{
}

CChromosomeInBits::~CChromosomeInBits(void)
{
    delete [] this->pLowerBits;
    delete [] this->pUpperBits;
    // don't delete this->caChromosome.
}

CChromosomeInBits::CChromosomeInBits(char* caChromosome, unsigned int uiChrLength)
{
    this->caChromosome = caChromosome;
    this->uiChrLength = uiChrLength;
    this->uiChrLengthInWordSize = (uiChrLength - 1) / wordSize + 1;
    this->pLowerBits = new WORD_SIZE[uiChrLengthInWordSize];
    this->pUpperBits = new WORD_SIZE[uiChrLengthInWordSize];
    memset(pLowerBits, 0x00, uiChrLengthInWordSize);
    memset(pUpperBits, 0x00, uiChrLengthInWordSize);
    //TODO, find a better way to interpret the non ACGT character
    //If there are non-ACGT characters in caChromosome. The memory will still set to A

    unsigned int i;
    for (i = 0; i + 1 < this->uiChrLengthInWordSize; i++) {  //The first this->uiChrLengthInWordSize - 1 word are fully encoded
        encodeRead(&caChromosome[i * wordSize], wordSize,
                   &this->pUpperBits[i], &this->pLowerBits[i]);
    }
    encodeRead(&caChromosome[i * wordSize], uiChrLength - wordSize * i,
               &this->pUpperBits[i], &this->pLowerBits[i]);
}

int CChromosomeInBits::initialization()
{
    this->pLowerBits = NULL;
    this->pUpperBits = NULL;
    this->caChromosome = NULL;
    caSubstring[0] = '\0';
    return (0);
}

/* Note the chromosome encoding is NOT continuous. The more significant bits of each words encodes
 * nucleotides in front of those less significant bits. This encodeing make bits incontinuously mapped
 * to chromosome index. Note the first bit of each WORD represent the first base of each section
 */
CReadInBits CChromosomeInBits::getSubstringInBits(unsigned int uiGenomeIndex)
{
    CReadInBits r;
    unsigned int indexInWords = uiGenomeIndex / wordSize;
    unsigned int bitsShits = uiGenomeIndex % wordSize;
    if (this->uiChrLengthInWordSize > indexInWords) {
        r.UpperBits = this->pUpperBits[indexInWords] >> bitsShits;
        r.LowerBits = this->pLowerBits[indexInWords] >> bitsShits;
        if (bitsShits != 0) {
            r.UpperBits |= (this->pUpperBits[indexInWords + 1] << (wordSize - bitsShits));
            r.LowerBits |= (this->pLowerBits[indexInWords + 1] << (wordSize - bitsShits));
        }
    } else
        cout << "Warning, wrong chromosome index " << endl;
    return(r);
}

// eliminate the tail bits out of read length range
CReadInBits CChromosomeInBits::getSubstringInBits(unsigned int uiGenomeIndex, unsigned int uiSubstringLength)
{
    CReadInBits r;
    unsigned int indexInWords = uiGenomeIndex / wordSize;
    unsigned int bitsShits = uiGenomeIndex % wordSize;
    r.UpperBits = this->pUpperBits[indexInWords] >> bitsShits;
    r.LowerBits = this->pLowerBits[indexInWords] >> bitsShits;
    if (bitsShits != 0) {
        r.UpperBits |= (this->pUpperBits[indexInWords + 1] << (wordSize - bitsShits));
        r.LowerBits |= (this->pLowerBits[indexInWords + 1] << (wordSize - bitsShits));
    }
    unsigned int elimatedBitsNo = wordSize - uiSubstringLength;
    r.UpperBits <<= elimatedBitsNo;
    r.LowerBits <<= elimatedBitsNo;
    r.UpperBits >>= elimatedBitsNo;
    r.LowerBits >>= elimatedBitsNo;
    return (r);
}


char* CChromosomeInBits::getSubstring(unsigned int uiGenomeIndex)
{
    CReadInBits r = getSubstringInBits(uiGenomeIndex);
    decodeRead(caSubstring, wordSize, r.UpperBits, r.LowerBits);
    return (caSubstring);
}

char* CChromosomeInBits::getSubstring(unsigned int uiGenomeIndex, unsigned int uiSubstringLength)
{
    CReadInBits r = getSubstringInBits(uiGenomeIndex);
    decodeRead(caSubstring, wordSize, r.UpperBits, r.LowerBits);
    if (uiSubstringLength <= wordSize) {
        caSubstring[uiSubstringLength] = '\0';
    }

    if (uiGenomeIndex +  uiSubstringLength > uiChrLength) {
        if (uiChrLength > uiGenomeIndex)
            caSubstring[uiChrLength - uiGenomeIndex] = '\0';
        else {
            cout << "Warning, Wrong genome index " << endl;
            caSubstring[0] = '\0';
        }
    }
    return (caSubstring);
}