File: ClipCR4.cpp

package info (click to toggle)
rna-star 2.7.8a%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 3,076 kB
  • sloc: cpp: 20,429; awk: 483; ansic: 470; makefile: 181; sh: 31
file content (107 lines) | stat: -rw-r--r-- 2,750 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#include "ClipCR4.h"

ClipCR4::ClipCR4()
{
    dbN = 64;
    
    scoreMatrix = {
        1, -2, -2, -2, -2,
       -2,  1, -2, -2, -2,
       -2, -2,  1, -2, -2,
       -2, -2, -2,  1, -2,
       -2, -2, -2,  -2, 0
    };
    
    readLen = 91;
    alphabetLength = 5;
    gapOpen = 2;
    gapExt = 2;
    
    
    dbSeqArr = new uint8_t[dbN*readLen];
    dbSeqs = new uint8_t*[dbN];
    for (int id=0; id<dbN; id++) {
        dbSeqs[id]=dbSeqArr+id*readLen;
    };
    
    dbSeqsLen = new int[dbN];
    for (int id=0; id<dbN; id++) {
        dbSeqsLen[id] = readLen; //rL;
    };  
    
    storeClip.resize(dbN);
    opalRes.resize(dbN);
    opalResP = new OpalSearchResult*[dbN];
    
    for (int i = 0; i < dbN; i++) {
        opalResP[i]=&opalRes[i];
    };     

};


void ClipCR4::opalFillOneSeq(uint32 idb, char *seq, uint32 seqL)
{
    uint32 minLen = min(seqL, readLen);
    
    memcpy(dbSeqs[idb], seq, minLen);
    
    for (uint32 ib=0; ib<minLen; ib++) {//TODO: vectorize with multiplications, i.e. dbSeq= (seq=='C')+(seq=='G')*2+(seq=='T')*3+(seq=='N')*4
        switch (dbSeqs[idb][ib]) {
            case 'A':
                dbSeqs[idb][ib]=0;
                break;
            case 'C':
                dbSeqs[idb][ib]=1;
                break;        
            case 'G':
                dbSeqs[idb][ib]=2;
                break;   
            case 'T':
                dbSeqs[idb][ib]=3;
                break;   
            default:
                dbSeqs[idb][ib]=4;
        };
    };
    
    if (seqL<readLen) //fill the rest of the sequence with Ns
        memset(dbSeqs[idb]+seqL, 4, readLen-seqL);
};

void ClipCR4::opalAlign(uint8_t *query, uint32 queryLen, int dbN1)
{
    for (int idb = 0; idb < dbN1; idb++) {
        opalInitSearchResult(opalResP[idb]);
    };    
    opalSearchDatabase(query, queryLen, dbSeqs, dbN1, dbSeqsLen,
                        gapOpen, gapExt, scoreMatrix.data(), alphabetLength, opalResP,
                        OPAL_SEARCH_SCORE_END, OPAL_MODE_OV, OPAL_OVERFLOW_BUCKETS);//, OPAL_OVERFLOW_SIMPLE);//
};

uint32 ClipCR4::polyTail3p(char *seq, uint32 seqLen)
{//clip polyA tail
    //hardcoded for CR4 trimming

    uint32_t ib1=seqLen-1;
    int32_t score=0, score1=0;
    for (uint32_t ib=1; ib<=seqLen; ib++) {
        if ( seq[seqLen-ib] == 0 ) {//A-tail
            score += 1; //+1 for matches
            if (score*10 >= (int)ib*7) {//score>=0.7*clipL
                ib1 = ib;
                score1 = score;
            };
        } else {
            score -= 2; //-2 for mismatches
            if ( ib-score > 27 ) 
                break; //score drop is too big
        };
    };
    if ( score1<20 )
        ib1=0; //score is too small
        
    return ib1;
};