File: Transcriptome_quantAlign.cpp

package info (click to toggle)
rna-star 2.7.8a%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 3,076 kB
  • sloc: cpp: 20,429; awk: 483; ansic: 470; makefile: 181; sh: 31
file content (114 lines) | stat: -rwxr-xr-x 5,274 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#include "Transcriptome.h"
#include "ReadAlign.h"
#include "serviceFuns.cpp"

int alignToTranscript(Transcript &aG, uint trS1, uint8 trStr1, uint32 *exSE1, uint32 *exLenCum1, uint16 exN1, Transcript &aT) {

    //find exon that overlaps beginning of the read
    uint32 g1=aG.exons[0][EX_G]-trS1;//start of the align
    uint32 ex1=binarySearch1<uint32>(g1, exSE1, 2*exN1);
    //this sholud not be possible - we check before we call this function
    if (ex1>=2*exN1) return 0; //align start is to the right of all exons

    if (ex1%2==1) {//beginning of the read >=end of an exon
        if (exSE1[ex1]==g1) {//first base of the read is exactly the last base of the exon
            --ex1;
        } else {
            return 0;//beginning of the read is past the end of an exon, align does not belong to this transcript
        };
    };
    ex1=ex1/2; //this is the first exon of the alignment

    aT.nExons=0;
    aT.primaryFlag=false;

    aG.canonSJ[aG.nExons-1]=-999; //marks the last exons
    for (uint32 iab=0; iab<aG.nExons; iab++) {//scan through all blocks of the align
        if (aG.exons[iab][EX_G]+aG.exons[iab][EX_L]>exSE1[2*ex1+1]+trS1+1) {//block extends past exon end
            return 0;
        };
        if (iab==0 || aG.canonSJ[iab-1]<0) {
            aT.exons[aT.nExons][EX_R]=aG.exons[iab][EX_R];
            aT.exons[aT.nExons][EX_G]=aG.exons[iab][EX_G]-trS1-exSE1[2*ex1]+exLenCum1[ex1];
            aT.exons[aT.nExons][EX_L]=aG.exons[iab][EX_L];
            aT.exons[aT.nExons][EX_iFrag]=aG.exons[iab][EX_iFrag];
            if (aT.nExons>0) aT.canonSJ[aT.nExons-1]=aG.canonSJ[iab-1];
            ++aT.nExons;
        } else {
            aT.exons[aT.nExons-1][EX_L]+=aG.exons[iab][EX_L];
        };
        switch (aG.canonSJ[iab]) {
            case -999: //last exon
                if (trStr1==2) {//convert align coordinates if on the -strand
                    uint32 trlength=exLenCum1[exN1-1]+exSE1[2*exN1-1]-exSE1[2*exN1-2]+1; //transcript length
                    for (uint32 iex=0; iex<aT.nExons; iex++) {
                        aT.exons[iex][EX_R]=aG.Lread-(aT.exons[iex][EX_R]+aT.exons[iex][EX_L]);
                        aT.exons[iex][EX_G]=trlength-(aT.exons[iex][EX_G]+aT.exons[iex][EX_L]);
                    };
                    for (uint32 iex=0; iex<aT.nExons/2; iex++) {
                        swap(aT.exons[iex][EX_R],aT.exons[aT.nExons-1-iex][EX_R]);
                        swap(aT.exons[iex][EX_G],aT.exons[aT.nExons-1-iex][EX_G]);
                        swap(aT.exons[iex][EX_L],aT.exons[aT.nExons-1-iex][EX_L]);
                        swap(aT.exons[iex][EX_iFrag],aT.exons[aT.nExons-1-iex][EX_iFrag]);
                    };
                    for (uint32 iex=0; iex<(aT.nExons-1)/2; iex++) {
                        swap(aT.canonSJ[iex],aT.canonSJ[aT.nExons-2-iex]);
                    };
                };
                for (uint32 iex=0; iex<aT.nExons; iex++) {//no junctions in the transcritomic coordinates
                    aT.sjAnnot[iex]=0;
                    aT.shiftSJ[iex][0]=0;
                    aT.shiftSJ[iex][1]=0;
                    aT.sjStr[iex]=0;
                };

                return 1; //reached the end of blocks, align is consistent with this transcript
                break;
            case -3: //mate connection
                ex1=binarySearch1<uint32>(aG.exons[iab+1][EX_G]-trS1, exSE1, 2*exN1);
                if (ex1%2==1) {//beginning of the mext mate in the middle of the exon?
                    return 0; //align does not belong to this transcript
                } else {
                    ex1=ex1/2; //this is the first exon of the second mate
                };
                break;
            case -2: //insertion
                break;
            case -1: //deletion
                break;
            default://junctions
                if ( aG.exons[iab][EX_G]+aG.exons[iab][EX_L]==exSE1[2*ex1+1]+trS1+1 && aG.exons[iab+1][EX_G]==exSE1[2*(ex1+1)]+trS1 ) {
                    //junction matches transcript junction
                    ++ex1;
                } else {
                    return 0;
                };
        };
    };
    return 0; //this should not happen
};

uint32 Transcriptome::quantAlign (Transcript &aG, Transcript *aTall) {
    uint32 nAtr=0; //number of alignments to the transcriptome

    //binary search through transcript starts
    uint32 tr1=binarySearch1a<uint>(aG.exons[0][EX_G], trS, nTr);//tr1 has the maximum transcript start such that it is still <= align start
    if (tr1==(uint32) -1) return 0; //alignment outside of range of all transcripts

    uint aGend=aG.exons[aG.nExons-1][EX_G];

    ++tr1;
    do {//cycle back through all the transcripts
        --tr1;
        if (aGend<=trE[tr1]) {//this transcript contains the read
                int aStatus=alignToTranscript(aG, trS[tr1], trStr[tr1], exSE+2*trExI[tr1], exLenCum+trExI[tr1], trExN[tr1], aTall[nAtr]);
                if (aStatus==1) {//align conforms with the transcript
                    aTall[nAtr].Chr = tr1;
                    aTall[nAtr].Str = trStr[tr1]==1 ? aG.Str : 1-aG.Str; //TODO strandedness
                    ++nAtr;
                };
        };
    } while (trEmax[tr1]>=aGend && tr1>0);

    return nAtr;
};