1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
|
#include "ReadAlign.h"
#include "GlobalVariables.h"
#include "ErrorWarning.h"
void ReadAlign::outputAlignments() {
if (mapGen.pGe.gType==101) {//temporary
ReadAlign::spliceGraphWriteSAM();
return;
};
ReadAlign::outFilterBySJout();//sets outFilterBySJoutPass=false if read is held for the 2nd stage of outFilterBySJout
if (outFilterBySJoutPass) {//otherwise align is held for the 2nd stage of outFilterBySJout
////////////////////////////////////
if (unmapType<0) {//passed mappedFilter. Unmapped reads can have nTr>0 - "too many loci"
if (nTr>1) {//multimappers
statsRA.mappedReadsM++;
unmapType = -2; //not sure if this used
} else if (nTr==1) {//unique mappers
statsRA.mappedReadsU++;
statsRA.transcriptStats(*(trMult[0]),Lread);
};
if (!P.pGe.transform.outSAM) {
ReadAlign::recordSJ(nTr, trMult, chunkOutSJ); //this will set mateMapped
} else {
ReadAlign::recordSJ(alignsGenOut.alN, alignsGenOut.alMult, chunkOutSJ);
};
ReadAlign::alignedAnnotation();
};
//the operations below are both for mapped and unmapped reads
soloRead->readBar->getCBandUMI(Read0, Qual0, readLengthOriginal, readNameExtra[0], readFilesIndex, readName);
soloRead->record((unmapType<0 ? nTr : 0), trMult, iReadAll, readAnnot); //need to supply nTr=0 for unmapped reads
if (!P.pGe.transform.outSAM) {
ReadAlign::writeSAM(nTr, trMult, trBest); //this will set mateMapped
} else {
ReadAlign::writeSAM(alignsGenOut.alN, alignsGenOut.alMult, alignsGenOut.alBest);
};
};
if (unmapType>=0) {//unmapped reads
statsRA.unmappedAll++; //include unmapType==4, i.e. one-mate alignments of PE reads - which may have been set in writeSAM above
ReadAlign::outReadsUnmapped(); //uses mateMapped that was set in writeSAM above
};
};
///////////////////////////////////////////////////////////////////////////////////////////////////
void ReadAlign::recordSJ(uint64 nTrO, Transcript **trO, OutSJ *cSJ)
{//junction output for mapped reads (i.e. passed BySJout filtering)
if ( P.outSJfilterReads=="All" || nTrO==1 ) {
uint64 sjReadStartN=cSJ->N;
for (uint64 iTr=0; iTr<nTrO; iTr++) {//write all transcripts junctions
outputTranscriptSJ (*(trO[iTr]), nTrO, cSJ, sjReadStartN);
};
};
};
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
void ReadAlign::outFilterBySJout()
{//filtering by SJout
outFilterBySJoutPass=true;//only false if the alignment is held for outFilterBySJoutStage. True even if unmapped
if (unmapType>0 || P.outFilterBySJoutStage!=1)
return; //unmapped, or 2nd stage
for (uint iTr=0;iTr<nTr;iTr++) {//check transcript for unannotated junctions
for (uint iex=0;iex<trMult[iTr]->nExons-1;iex++) {//check all junctions
if (trMult[iTr]->canonSJ[iex]>=0 && trMult[iTr]->sjAnnot[iex]==0) {
outFilterBySJoutPass=false;
break;
};
};
if (!outFilterBySJoutPass)
break;
};
if (!outFilterBySJoutPass) {//this read is held for further filtering BySJout, record fastq
unmapType=-3; //the read is not conisdered mapped
statsRA.readN--;
statsRA.readBases -= readLength[0]+readLength[1];
for (uint im=0;im<P.readNends;im++) {
chunkOutFilterBySJoutFiles[im] << readNameMates[im] <<" "<< iReadAll <<" "<< readFilter <<" "<< readFilesIndex;
if (!readNameExtra[im].empty())
chunkOutFilterBySJoutFiles[im]<<" "<< readNameExtra[im];
chunkOutFilterBySJoutFiles[im] <<"\n";
chunkOutFilterBySJoutFiles[im] << Read0[im] <<"\n";
if (readFileType==2) {//fastq
chunkOutFilterBySJoutFiles[im] << "+\n";
chunkOutFilterBySJoutFiles[im] << Qual0[im] <<"\n";
};
};
};
//SJ output for all reads, including those not passed bySJout filtering. This only needs to be at the 1st stage of BySJout filtering
ReadAlign::recordSJ(nTr, trMult, chunkOutSJ1); //this will set mateMapped
};
////////////////////////////////////////////////////////////////////////////////////////////////
void ReadAlign::writeSAM(uint64 nTrOutSAM, Transcript **trOutSAM, Transcript *trBestSAM)
{
outBAMbytes=0;
mateMapped[0] = mateMapped[1] = false; //mateMapped = are mates present in any of the transcripts?
if (unmapType < 0 && outFilterBySJoutPass) {//write to SAM/BAM
//////////////////////////////////////////////////////////////////////////////////
/////////////outSAMfilter
if (P.outSAMfilter.yes) {
if (P.outSAMfilter.KeepOnlyAddedReferences) {
for (uint itr=0;itr<nTrOutSAM;itr++) {//check if transcripts map to chr other than added references
if (trOutSAM[itr]->Chr<mapGen.genomeInsertChrIndFirst) {
return;//no SAM output
};
};
} else if (P.outSAMfilter.KeepAllAddedReferences) {
uint64 nTrOutSAM1=0;
for (uint itr=0;itr<nTrOutSAM;itr++) {//check if transcripts map to chr other than added references
if (trOutSAM[itr]->Chr>=mapGen.genomeInsertChrIndFirst) {
trOutSAM[nTrOutSAM1]=trOutSAM[itr];
trOutSAM[nTrOutSAM1]->primaryFlag=false;
++nTrOutSAM1;
};
};
if (nTrOutSAM1==0) {
return;//no SAM output
} else {
trOutSAM[0]->primaryFlag=true;
};
nTrOutSAM = nTrOutSAM1;
};
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////
////// write SAM/BAM
nTrOutSAM=min(P.outSAMmultNmax,nTrOutSAM); //number of aligns to write to SAM/BAM files
for (uint iTr=0;iTr<nTrOutSAM;iTr++) {//write all transcripts
//mateMapped1 = true if a mate is present in this transcript
bool mateMapped1[2]={false,false};
mateMapped1[trOutSAM[iTr]->exons[0][EX_iFrag]]=true;
mateMapped1[trOutSAM[iTr]->exons[trOutSAM[iTr]->nExons-1][EX_iFrag]]=true;
if (P.outSAMbool) {//SAM output
outBAMbytes+=outputTranscriptSAM(*(trOutSAM[iTr]), nTr, iTr, (uint) -1, (uint) -1, 0, -1, NULL, outSAMstream);
if (P.outSAMunmapped.keepPairs && P.readNmates>1 && ( !mateMapped1[0] || !mateMapped1[1] ) ) {//keep pairs && paired reads && one of the mates not mapped in this transcript //not readNends: this is alignment
outBAMbytes+= outputTranscriptSAM(*(trOutSAM[iTr]), 0, 0, (uint) -1, (uint) -1, 0, 4, mateMapped1, outSAMstream);
};
};
if (P.outBAMunsorted || P.outBAMcoord) {//BAM output
alignBAM(*(trOutSAM[iTr]), nTrOutSAM, iTr, mapGen.chrStart[trOutSAM[iTr]->Chr], (uint) -1, (uint) -1, 0, -1, NULL, P.outSAMattrOrder,outBAMoneAlign, outBAMoneAlignNbytes);
if (P.outBAMunsorted) {//unsorted
for (uint imate=0; imate<P.readNmates; imate++) {//output each mate //not readNends: this is alignment
outBAMunsorted->unsortedOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], (imate>0 || iTr>0) ? 0 : (outBAMoneAlignNbytes[0]+outBAMoneAlignNbytes[1])*2*nTrOutSAM);
};
if (P.outSAMunmapped.keepPairs && P.readNmates>1 && ( !mateMapped1[0] || !mateMapped1[1] ) ) {//keep pairs && paired reads && one of the mates not mapped in this transcript //not readNends: this is alignment
alignBAM(*trOutSAM[iTr], 0, 0, mapGen.chrStart[trOutSAM[iTr]->Chr], (uint) -1, (uint) -1, 0, 4, mateMapped1, P.outSAMattrOrder, outBAMoneAlign, outBAMoneAlignNbytes);
for (uint imate=0; imate<P.readNmates; imate++) {//output each mate //not readNends: this is alignment
outBAMunsorted->unsortedOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], (imate>0 || iTr>0) ? 0 : (outBAMoneAlignNbytes[0]+outBAMoneAlignNbytes[1])*2*nTrOutSAM);
};
};
};
if (P.outBAMcoord) {//coordinate sorted
for (uint imate=0; imate<P.readNmates; imate++) {//output each mate //not readNends: this is alignment
outBAMcoord->coordOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], (iReadAll<<32) | (iTr<<8) | trOutSAM[iTr]->exons[0][EX_iFrag] );
};
};
};
};
/////////////////////////////////////////////////////////////////////////////////////////////
//////// write unmapped ends
//TODO it's better to check all transcripts in the loop above for presence of both mates
mateMapped[trBestSAM->exons[0][EX_iFrag]] = true;
mateMapped[trBestSAM->exons[trBestSAM->nExons-1][EX_iFrag]] = true;
if (P.readNmates>1 && !(mateMapped[0] && mateMapped[1]) ) {//not readNends: this is alignment
unmapType=4;
};
if (unmapType==4 && P.outSAMunmapped.within) {//output unmapped ends for single-end alignments of PE reads
if (P.outSAMbool && !P.outSAMunmapped.keepPairs ) {
outBAMbytes+= outputTranscriptSAM(*trBestSAM, 0, 0, (uint) -1, (uint) -1, 0, unmapType, mateMapped, outSAMstream);
};
if ( P.outBAMcoord || (P.outBAMunsorted && !P.outSAMunmapped.keepPairs) ) {//BAM output
alignBAM(*trBestSAM, 0, 0, mapGen.chrStart[trBestSAM->Chr], (uint) -1, (uint) -1, 0, unmapType, mateMapped, P.outSAMattrOrder, outBAMoneAlign, outBAMoneAlignNbytes);
for (uint imate=0; imate<P.readNmates; imate++) {//alignBAM output is empty for mapped mate, but still need to scan through it //not readNends: this is alignment
if (P.outBAMunsorted && !P.outSAMunmapped.keepPairs) {
outBAMunsorted->unsortedOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], imate>0 ? 0 : outBAMoneAlignNbytes[0]+outBAMoneAlignNbytes[1]);
};
if (P.outBAMcoord) {//KeepPairs option does not affect for sorted BAM since we do not want multiple entries for the same unmapped read
outBAMcoord->coordOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], iReadAll<<32);
};
};
};
};
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////write completely unmapped
} else if (unmapType>=0 && P.outSAMunmapped.within) {//output unmapped within && unmapped read && both mates unmapped
if (P.outBAMcoord || P.outBAMunsorted || P.quant.trSAM.bamYes) {//BAM output
alignBAM(*trBestSAM, 0, 0, mapGen.chrStart[trBestSAM->Chr], (uint) -1, (uint) -1, 0, unmapType, mateMapped, P.outSAMattrOrder, outBAMoneAlign, outBAMoneAlignNbytes);
for (uint imate=0; imate<P.readNmates; imate++) {//output each mate //not readNends: this is alignment
if (P.outBAMunsorted) {
outBAMunsorted->unsortedOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], imate>0 ? 0 : outBAMoneAlignNbytes[0]+outBAMoneAlignNbytes[1]);
};
if (P.quant.trSAM.bamYes) {
outBAMquant->unsortedOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], imate>0 ? 0 : outBAMoneAlignNbytes[0]+outBAMoneAlignNbytes[1]);
};
if (P.outBAMcoord) {
outBAMcoord->coordOneAlign(outBAMoneAlign[imate], outBAMoneAlignNbytes[imate], iReadAll<<32);
};
};
};
if (P.outSAMbool) {//output SAM
outBAMbytes+= outputTranscriptSAM(*trBestSAM, 0, 0, (uint) -1, (uint) -1, 0, unmapType, mateMapped, outSAMstream);
};
};
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void ReadAlign::outReadsUnmapped()
{
if (P.outReadsUnmapped=="Fastx" ) {//output to fasta/q files. Include unmapType==4, i.e. one-mate alignments of PE reads
for (uint im=0;im<P.readNends;im++) {
chunkOutUnmappedReadsStream[im] << readNameMates[im] <<" "<<im<<":"<< readFilter <<": "<< readNameExtra[im];
if (P.readNmates>1) //not readNends: this is alignment
chunkOutUnmappedReadsStream[im] <<" "<< int(mateMapped[0]) << int(mateMapped[1]);
chunkOutUnmappedReadsStream[im] <<"\n";
chunkOutUnmappedReadsStream[im] << Read0[im] <<"\n";
if (readFileType==2) {//fastq
chunkOutUnmappedReadsStream[im] << "+\n";
chunkOutUnmappedReadsStream[im] << Qual0[im] <<"\n";
};
};
};
};
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void ReadAlign::spliceGraphWriteSAM()
{//temporary: SAM output for SpliceGraph
outBAMbytes=0;
uint64 nTrOutSAM = nTr;
if (mapGen.genomeOut.convYes) {//convert to new genome
nTrOutSAM=0;
for (uint32 iTr=0; iTr<nTrOutSAM; iTr++) {//convert output transcripts into new genome
*alignsGenOut.alMult[nTrOutSAM] = *trMult[iTr];//copy information before conversion
if (trMult[iTr]->convertGenomeCigar(*mapGen.genomeOut.g, *alignsGenOut.alMult[nTrOutSAM])) {
++nTrOutSAM;
trMult[nTrOutSAM-1] = alignsGenOut.alMult[nTrOutSAM-1]; //point to new transcsript
};
};
nTrOutSAM=nTrOutSAM;
};
for (uint iTr=0; iTr<nTrOutSAM; iTr++) {//write all transcripts
outBAMbytes += outputSpliceGraphSAM(*(trMult[iTr]), nTrOutSAM, iTr, outSAMstream);
};
};
void ReadAlign::alignedAnnotation()
{
//TODO maybe initialize readAnnot to all empty?
//genes
if ( P.quant.geCount.yes ) {
chunkTr->geneCountsAddAlign(nTr, trMult, readAnnot.geneExonOverlap);
};
//solo-GeneFull
if ( P.quant.geneFull.yes ) {
chunkTr->geneFullAlignOverlap(nTr, trMult, P.pSolo.strand, readAnnot);
};
//solo-Gene
if ( P.quant.gene.yes ) {
chunkTr->classifyAlign(trMult, nTr, readAnnot);
};
//transcripts
if ( P.quant.trSAM.yes ) {
quantTranscriptome(chunkTr, nTr, trMult, alignTrAll);
};
};
|