File: SoloFeature_statsOutput.cpp

package info (click to toggle)
rna-star 2.7.8a%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 3,076 kB
  • sloc: cpp: 20,429; awk: 483; ansic: 470; makefile: 181; sh: 31
file content (80 lines) | stat: -rw-r--r-- 4,108 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#include "SoloFeature.h"
#include "streamFuns.h"
#include "Stats.h"
#include "GlobalVariables.h"

void SoloFeature::statsOutput()
{
    ofstream &strOut=ofstrOpen(outputPrefix+"Summary.csv", ERROR_OUT, P);
    //Sequencing
    strOut << "Number of Reads," << g_statsAll.readN <<'\n';
    strOut << "Reads With Valid Barcodes," << 1.0 - double( readBarSum->stats.numInvalidBarcodes() + readFeatSum->stats.numInvalidBarcodes() )/g_statsAll.readN <<'\n';
    strOut << "Sequencing Saturation," << readFeatSum->stats.numSequencingSaturation() <<'\n';
    
    if (pSolo.type != pSolo.SoloTypes::SmartSeq) {//qualHist for CB+UMI
        uint64 q30=0, ntot=0;
        for (uint32 ix=0; ix<256; ix++) {
            ntot += readBarSum->qualHist[ix];
            if (ix >= (P.readQualityScoreBase + 30))
                q30 += readBarSum->qualHist[ix];
        };
           
        strOut << "Q30 Bases in CB+UMI,"   << double(q30)/ntot <<'\n';
    };
    
    {//qualHist for RNA
        uint64 q30=0, ntot=0;
        for (int ichunk=0; ichunk<P.runThreadN; ichunk++) {
            for (uint32 imate=0; imate<P.readNmates; imate++) {
                for (uint32 ix=0; ix<256; ix++) {
                    ntot += RAchunk[ichunk]->RA->qualHist[imate][ix];
                    if (ix >= (P.readQualityScoreBase + 30))
                        q30 += RAchunk[ichunk]->RA->qualHist[imate][ix];
                };
            };
        };
        strOut << "Q30 Bases in RNA read," << double(q30)/ntot <<'\n';
    };    
    
    strOut << "Reads Mapped to Genome: Unique+Multiple," << double(g_statsAll.mappedReadsU+g_statsAll.mappedReadsM)/g_statsAll.readN <<'\n';
    strOut << "Reads Mapped to Genome: Unique," << double(g_statsAll.mappedReadsU)/g_statsAll.readN <<'\n';
    
    string mapfeat=SoloFeatureTypes::Names[featureType];
    if (featureType==SoloFeatureTypes::Gene || featureType==SoloFeatureTypes::GeneFull)
        mapfeat="Transcriptome";
    
    strOut << "Reads Mapped to "<< mapfeat << ": Unique+Multipe " << SoloFeatureTypes::Names[featureType] <<"s," << double( readFeatSum->stats.numMappedToTranscriptome() )/g_statsAll.readN <<'\n';
    strOut << "Reads Mapped to "<< mapfeat << ": Unique " << SoloFeatureTypes::Names[featureType] <<"s," << double( readFeatSum->stats.numMappedToTranscriptomeUnique() )/g_statsAll.readN <<'\n';
    
    if (pSolo.cellFilter.type[0]!="None" && (featureType==SoloFeatureTypes::Gene || featureType==SoloFeatureTypes::GeneFull)) {
        //if (pSolo.cellFilter.type[0]=="CellRanger2.2") 
        {
            strOut << "Estimated Number of Cells," << filteredCells.nCells <<'\n';

            strOut << "Reads in Cells Mapped to Unique " << SoloFeatureTypes::Names[featureType] << "s," << filteredCells.nReadInCells <<'\n';
            strOut << "Fraction of Reads in Cells," << double(filteredCells.nReadInCells) / readFeatSum->stats.numMappedToTranscriptomeUnique() <<'\n';
            strOut << "Mean Reads per Cell," << filteredCells.meanReadPerCell <<'\n';
            strOut << "Median Reads per Cell," << filteredCells.medianReadPerCell <<'\n';

            strOut << "UMIs in Cells," << filteredCells.nUMIinCells <<'\n';
            strOut << "Mean UMI per Cell," << filteredCells.meanUMIperCell <<'\n';
            strOut << "Median UMI per Cell," << filteredCells.medianUMIperCell <<'\n';    

            strOut << "Mean "   << SoloFeatureTypes::Names[featureType] << "s per Cell," << filteredCells.meanGenePerCell <<'\n';
            strOut << "Median " << SoloFeatureTypes::Names[featureType] << "s per Cell," << filteredCells.medianGenePerCell <<'\n';    
            strOut << "Total "  << SoloFeatureTypes::Names[featureType] << "s Detected," << filteredCells.nGeneDetected <<'\n';    
        };

        //output UMI per cell, sorted
        ofstream &strOutUMIperCell = ofstrOpen(outputPrefix+"UMIperCellSorted.txt", ERROR_OUT, P);

        for (auto & n : nUMIperCBsorted) {
            if (n==0)
                break;
            strOutUMIperCell << n <<'\n';
        };
        strOutUMIperCell.close();
    };
    
    strOut.close();
};