1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
#include "ReadAlignChunk.h"
#include "Parameters.h"
#include "OutSJ.h"
#include <limits.h>
#include "ErrorWarning.h"
int compareUint(const void* i1, const void* i2) {//compare uint arrays
uint s1=*( (uint*)i1 );
uint s2=*( (uint*)i2 );
if (s1>s2) {
return 1;
} else if (s1<s2) {
return -1;
} else {
return 0;
};
};
void outputSJ(ReadAlignChunk** RAchunk, Parameters& P) {//collapses junctions from all therads/chunks; outputs junctions to file
Junction oneSJ(RAchunk[0]->RA->genOut);
char** sjChunks = new char* [P.runThreadN+1];
#define OUTSJ_limitScale 5
OutSJ allSJ (P.limitOutSJcollapsed*OUTSJ_limitScale,P,RAchunk[0]->RA->genOut);
if (P.outFilterBySJoutStage!=1) {//chunkOutSJ
for (int ic=0;ic<P.runThreadN;ic++) {//populate sjChunks with links to data
sjChunks[ic]=RAchunk[ic]->chunkOutSJ->data;
memset(sjChunks[ic]+RAchunk[ic]->chunkOutSJ->N*oneSJ.dataSize,255,oneSJ.dataSize);//mark the junction after last with big number
};
} else {//chunkOutSJ1
for (int ic=0;ic<P.runThreadN;ic++) {//populate sjChunks with links to data
sjChunks[ic]=RAchunk[ic]->chunkOutSJ1->data;
memset(sjChunks[ic]+RAchunk[ic]->chunkOutSJ1->N*oneSJ.dataSize,255,oneSJ.dataSize);//mark the junction after last with big number
};
};
while (true) {
int icOut=-1;//chunk from which the junction is output
for (int ic=0;ic<P.runThreadN;ic++) {//scan through all chunks, find the "smallest" junction
if ( *(uint*)(sjChunks[ic])<ULONG_MAX && (icOut==-1 ||compareSJ((void*) sjChunks[ic], (void*) sjChunks[icOut])<0 ) ) {
icOut=ic;
};
};
if (icOut<0) break; //no more junctions to output
for (int ic=0;ic<P.runThreadN;ic++) {//scan through all chunks, find the junctions equal to icOut-junction
if (ic!=icOut && compareSJ((void*) sjChunks[ic], (void*) sjChunks[icOut])==0) {
oneSJ.collapseOneSJ(sjChunks[icOut],sjChunks[ic],P);//collapse ic-junction into icOut
sjChunks[ic] += oneSJ.dataSize;//shift ic-chunk by one junction
};
};
//write out the junction
oneSJ.junctionPointer(sjChunks[icOut],0);//point to the icOut junction
//filter the junction
bool sjFilter;
sjFilter=*oneSJ.annot>0 \
|| ( ( *oneSJ.countUnique>=(uint) P.outSJfilterCountUniqueMin[(*oneSJ.motif+1)/2] \
|| (*oneSJ.countMultiple+*oneSJ.countUnique)>=(uint) P.outSJfilterCountTotalMin[(*oneSJ.motif+1)/2] )\
&& *oneSJ.overhangLeft >= (uint) P.outSJfilterOverhangMin[(*oneSJ.motif+1)/2] \
&& *oneSJ.overhangRight >= (uint) P.outSJfilterOverhangMin[(*oneSJ.motif+1)/2] \
&& ( (*oneSJ.countMultiple+*oneSJ.countUnique)>P.outSJfilterIntronMaxVsReadN.size() || *oneSJ.gap<=(uint) P.outSJfilterIntronMaxVsReadN[*oneSJ.countMultiple+*oneSJ.countUnique-1]) );
if (sjFilter) {//record the junction in all SJ
memcpy(allSJ.data+allSJ.N*oneSJ.dataSize,sjChunks[icOut],oneSJ.dataSize);
allSJ.N++;
if (allSJ.N == P.limitOutSJcollapsed*OUTSJ_limitScale ) {
ostringstream errOut;
errOut <<"EXITING because of fatal error: buffer size for SJ output is too small\n";
errOut <<"Solution: increase input parameter --limitOutSJcollapsed\n";
exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
};
};
sjChunks[icOut] += oneSJ.dataSize;//shift icOut-chunk by one junction
};
bool* sjFilter=new bool[allSJ.N];
if (P.outFilterBySJoutStage!=2) {
//filter non-canonical junctions that are close to canonical
uint* sjA = new uint [allSJ.N*3];
for (uint ii=0;ii<allSJ.N;ii++) {//scan through all junctions, filter by the donor ditance to a nearest donor, fill acceptor array
oneSJ.junctionPointer(allSJ.data,ii);
sjFilter[ii]=false;
uint x1=0, x2=-1;
if (ii>0) x1=*( (uint*)(allSJ.data+(ii-1)*oneSJ.dataSize) ); //previous junction donor
if (ii+1<allSJ.N) x2=*( (uint*)(allSJ.data+(ii+1)*oneSJ.dataSize) ); //next junction donor
uint minDist=min(*oneSJ.start-x1, x2-*oneSJ.start);
sjFilter[ii]= minDist >= (uint) P.outSJfilterDistToOtherSJmin[(*oneSJ.motif+1)/2];
sjA[ii*3]=*oneSJ.start+(uint)*oneSJ.gap;//acceptor
sjA[ii*3+1]=ii;
if (*oneSJ.annot==0) {
sjA[ii*3+2]=*oneSJ.motif;
} else {
sjA[ii*3+2]=SJ_MOTIF_SIZE+1;
};
};
qsort((void*) sjA, allSJ.N, sizeof(uint)*3, compareUint);
for (uint ii=0;ii<allSJ.N;ii++) {//
if (sjA[ii*3+2]==SJ_MOTIF_SIZE+1) {//no filtering for annotated junctions
sjFilter[sjA[ii*3+1]]=true;
} else {
uint x1=0, x2=-1;
if (ii>0) x1=sjA[ii*3-3]; //previous junction donor
if (ii+1<allSJ.N) x2=sjA[ii*3+3]; //next junction donor
uint minDist=min(sjA[ii*3]-x1, x2-sjA[ii*3]);
sjFilter[sjA[ii*3+1]] = sjFilter[sjA[ii*3+1]] && ( minDist >= (uint) P.outSJfilterDistToOtherSJmin[(sjA[ii*3+2]+1)/2] );
};
};
};
//output junctions
P.sjAll[0].reserve(allSJ.N);
P.sjAll[1].reserve(allSJ.N);
if (P.outFilterBySJoutStage!=1) {//output file
ofstream outSJfileStream((P.outFileNamePrefix+"SJ.out.tab").c_str());
ofstream outSJtmpStream((P.outFileTmp+"SJ.start_gap.tsv").c_str());
for (uint ii=0;ii<allSJ.N;ii++) {//write to file
if ( P.outFilterBySJoutStage==2 || sjFilter[ii] ) {
oneSJ.junctionPointer(allSJ.data,ii);
oneSJ.outputStream(outSJfileStream);//write to file
outSJtmpStream << *oneSJ.start <<'\t'<< *oneSJ.gap <<'\n';
P.sjAll[0].push_back(*oneSJ.start);
P.sjAll[1].push_back(*oneSJ.gap);
};
};
outSJfileStream.close();
} else {//make sjNovel array in P
P.sjNovelN=0;
for (uint ii=0;ii<allSJ.N;ii++) {//count novel junctions
if (sjFilter[ii]) {//only those passing filter
oneSJ.junctionPointer(allSJ.data,ii);
if (*oneSJ.annot==0) P.sjNovelN++;
};
};
P.sjNovelStart = new uint [P.sjNovelN];
P.sjNovelEnd = new uint [P.sjNovelN];
P.inOut->logMain <<"Detected " <<P.sjNovelN<<" novel junctions that passed filtering, will proceed to filter reads that contained unannotated junctions"<<endl;
uint isj=0;
for (uint ii=0;ii<allSJ.N;ii++) {//write to file
if (sjFilter[ii]) {
oneSJ.junctionPointer(allSJ.data,ii);
if (*oneSJ.annot==0) {//unnnotated only
P.sjNovelStart[isj]=*oneSJ.start;
P.sjNovelEnd[isj]=*oneSJ.start+(uint)(*oneSJ.gap)-1;
isj++;
};
};
};
};
};
|