File: outputSJ.cpp

package info (click to toggle)
rna-star 2.7.8a%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 3,076 kB
  • sloc: cpp: 20,429; awk: 483; ansic: 470; makefile: 181; sh: 31
file content (159 lines) | stat: -rw-r--r-- 7,239 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#include "ReadAlignChunk.h"
#include "Parameters.h"
#include "OutSJ.h"
#include <limits.h>
#include "ErrorWarning.h"

int compareUint(const void* i1, const void* i2) {//compare uint arrays
    uint s1=*( (uint*)i1 );
    uint s2=*( (uint*)i2 );

    if (s1>s2) {
        return 1;
    } else if (s1<s2) {
        return -1;
    } else {
        return 0;
    };
};

void outputSJ(ReadAlignChunk** RAchunk, Parameters& P) {//collapses junctions from all therads/chunks; outputs junctions to file

    Junction oneSJ(RAchunk[0]->RA->genOut);
    char** sjChunks = new char* [P.runThreadN+1];
    #define OUTSJ_limitScale 5
    OutSJ allSJ (P.limitOutSJcollapsed*OUTSJ_limitScale,P,RAchunk[0]->RA->genOut);

    if (P.outFilterBySJoutStage!=1) {//chunkOutSJ
        for (int ic=0;ic<P.runThreadN;ic++) {//populate sjChunks with links to data
            sjChunks[ic]=RAchunk[ic]->chunkOutSJ->data;
            memset(sjChunks[ic]+RAchunk[ic]->chunkOutSJ->N*oneSJ.dataSize,255,oneSJ.dataSize);//mark the junction after last with big number
        };
    } else {//chunkOutSJ1
        for (int ic=0;ic<P.runThreadN;ic++) {//populate sjChunks with links to data
            sjChunks[ic]=RAchunk[ic]->chunkOutSJ1->data;
            memset(sjChunks[ic]+RAchunk[ic]->chunkOutSJ1->N*oneSJ.dataSize,255,oneSJ.dataSize);//mark the junction after last with big number
        };
    };

    while (true) {
        int icOut=-1;//chunk from which the junction is output
        for (int ic=0;ic<P.runThreadN;ic++) {//scan through all chunks, find the "smallest" junction
            if ( *(uint*)(sjChunks[ic])<ULONG_MAX && (icOut==-1 ||compareSJ((void*) sjChunks[ic], (void*) sjChunks[icOut])<0 ) ) {
                    icOut=ic;
                };
        };

        if (icOut<0) break; //no more junctions to output

        for (int ic=0;ic<P.runThreadN;ic++) {//scan through all chunks, find the junctions equal to icOut-junction
            if (ic!=icOut && compareSJ((void*) sjChunks[ic], (void*) sjChunks[icOut])==0) {
                oneSJ.collapseOneSJ(sjChunks[icOut],sjChunks[ic],P);//collapse ic-junction into icOut
                sjChunks[ic] += oneSJ.dataSize;//shift ic-chunk by one junction
            };
        };

        //write out the junction
        oneSJ.junctionPointer(sjChunks[icOut],0);//point to the icOut junction
        //filter the junction
        bool sjFilter;
        sjFilter=*oneSJ.annot>0 \
                || ( ( *oneSJ.countUnique>=(uint) P.outSJfilterCountUniqueMin[(*oneSJ.motif+1)/2] \
                    || (*oneSJ.countMultiple+*oneSJ.countUnique)>=(uint) P.outSJfilterCountTotalMin[(*oneSJ.motif+1)/2] )\
                && *oneSJ.overhangLeft >= (uint) P.outSJfilterOverhangMin[(*oneSJ.motif+1)/2] \
                && *oneSJ.overhangRight >= (uint) P.outSJfilterOverhangMin[(*oneSJ.motif+1)/2] \
                && ( (*oneSJ.countMultiple+*oneSJ.countUnique)>P.outSJfilterIntronMaxVsReadN.size() || *oneSJ.gap<=(uint) P.outSJfilterIntronMaxVsReadN[*oneSJ.countMultiple+*oneSJ.countUnique-1]) );

        if (sjFilter) {//record the junction in all SJ
            memcpy(allSJ.data+allSJ.N*oneSJ.dataSize,sjChunks[icOut],oneSJ.dataSize);
            allSJ.N++;
            if (allSJ.N == P.limitOutSJcollapsed*OUTSJ_limitScale ) {
                ostringstream errOut;
                errOut <<"EXITING because of fatal error: buffer size for SJ output is too small\n";
                errOut <<"Solution: increase input parameter --limitOutSJcollapsed\n";
                exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
            };
        };

        sjChunks[icOut] += oneSJ.dataSize;//shift icOut-chunk by one junction
    };

    bool* sjFilter=new bool[allSJ.N];
    if (P.outFilterBySJoutStage!=2) {
        //filter non-canonical junctions that are close to canonical
        uint* sjA = new uint [allSJ.N*3];
        for (uint ii=0;ii<allSJ.N;ii++) {//scan through all junctions, filter by the donor ditance to a nearest donor, fill acceptor array
            oneSJ.junctionPointer(allSJ.data,ii);

            sjFilter[ii]=false;
            uint x1=0, x2=-1;
            if (ii>0)         x1=*( (uint*)(allSJ.data+(ii-1)*oneSJ.dataSize) ); //previous junction donor
            if (ii+1<allSJ.N) x2=*( (uint*)(allSJ.data+(ii+1)*oneSJ.dataSize) ); //next junction donor
            uint minDist=min(*oneSJ.start-x1, x2-*oneSJ.start);
            sjFilter[ii]= minDist >= (uint) P.outSJfilterDistToOtherSJmin[(*oneSJ.motif+1)/2];
            sjA[ii*3]=*oneSJ.start+(uint)*oneSJ.gap;//acceptor
            sjA[ii*3+1]=ii;

            if (*oneSJ.annot==0) {
                sjA[ii*3+2]=*oneSJ.motif;
            } else {
                sjA[ii*3+2]=SJ_MOTIF_SIZE+1;
            };

        };
        qsort((void*) sjA, allSJ.N, sizeof(uint)*3, compareUint);
        for (uint ii=0;ii<allSJ.N;ii++) {//
            if (sjA[ii*3+2]==SJ_MOTIF_SIZE+1) {//no filtering for annotated junctions
                sjFilter[sjA[ii*3+1]]=true;
            } else {
                uint x1=0, x2=-1;
                if (ii>0)         x1=sjA[ii*3-3]; //previous junction donor
                if (ii+1<allSJ.N) x2=sjA[ii*3+3]; //next junction donor
                uint minDist=min(sjA[ii*3]-x1, x2-sjA[ii*3]);
                sjFilter[sjA[ii*3+1]] = sjFilter[sjA[ii*3+1]] && ( minDist >= (uint) P.outSJfilterDistToOtherSJmin[(sjA[ii*3+2]+1)/2] );
            };
        };
    };

    //output junctions
    P.sjAll[0].reserve(allSJ.N);
    P.sjAll[1].reserve(allSJ.N);

    if (P.outFilterBySJoutStage!=1) {//output file
        ofstream outSJfileStream((P.outFileNamePrefix+"SJ.out.tab").c_str());
        ofstream outSJtmpStream((P.outFileTmp+"SJ.start_gap.tsv").c_str());
        for (uint ii=0;ii<allSJ.N;ii++) {//write to file
            if ( P.outFilterBySJoutStage==2 || sjFilter[ii]  ) {
                oneSJ.junctionPointer(allSJ.data,ii);
                oneSJ.outputStream(outSJfileStream);//write to file
                outSJtmpStream << *oneSJ.start <<'\t'<< *oneSJ.gap <<'\n';
                P.sjAll[0].push_back(*oneSJ.start);
                P.sjAll[1].push_back(*oneSJ.gap);
            };
        };
        outSJfileStream.close();
    } else {//make sjNovel array in P
        P.sjNovelN=0;
        for (uint ii=0;ii<allSJ.N;ii++) {//count novel junctions
            if (sjFilter[ii]) {//only those passing filter
                oneSJ.junctionPointer(allSJ.data,ii);
                if (*oneSJ.annot==0) P.sjNovelN++;
            };
        };
        P.sjNovelStart = new uint [P.sjNovelN];
        P.sjNovelEnd = new uint [P.sjNovelN];
        P.inOut->logMain <<"Detected " <<P.sjNovelN<<" novel junctions that passed filtering, will proceed to filter reads that contained unannotated junctions"<<endl;

        uint isj=0;
        for (uint ii=0;ii<allSJ.N;ii++) {//write to file
            if (sjFilter[ii]) {
                oneSJ.junctionPointer(allSJ.data,ii);
                if (*oneSJ.annot==0) {//unnnotated only
                    P.sjNovelStart[isj]=*oneSJ.start;
                    P.sjNovelEnd[isj]=*oneSJ.start+(uint)(*oneSJ.gap)-1;
                    isj++;
                };
            };
        };
    };
};