File: SoloFeature_cellFiltering.cpp

package info (click to toggle)
rna-star 2.7.11b%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,492 kB
  • sloc: cpp: 21,951; awk: 827; ansic: 457; makefile: 192; sh: 31
file content (131 lines) | stat: -rw-r--r-- 5,796 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#include "SoloFeature.h"
#include "serviceFuns.cpp"
#include <math.h>

void SoloFeature::cellFiltering()
{    

    if (pSolo.cellFilter.type[0]=="None" ||  nCB<1) {//no filtering, or no cells to filter
        return;
    };
    
    switch(featureType) {
        default:
            return; //filtering not done for other features
            
        case SoloFeatureTypes::Velocyto: {
            filteredCells.reset(nCB);
            
            //Velocyto: use filter from Gene
            SoloFeature &soFeGe= *soloFeatAll[pSolo.featureInd[SoloFeatureTypes::Gene]];
            for (uint32 ic=0; ic<soFeGe.nCB; ic++) {
                if (soFeGe.filteredCells.filtVecBool[ic] && indCBwl[soFeGe.indCB[ic]] != (uint32)-1) {
                    //this cell passde Gene filtering, and is detected in Velocyto
                    filteredCells.filtVecBool[indCBwl[soFeGe.indCB[ic]]] = true;
                };
            };
                        
            nUMIperCBsorted=nUMIperCB;
            std::sort( nUMIperCBsorted.begin(), nUMIperCBsorted.end(), [](const uint32_t &u1, const uint32_t &u2) {return u1>u2;} ); //sort descending
            break;
        };
        case SoloFeatureTypes::Gene:
        case SoloFeatureTypes::GeneFull:
        case SoloFeatureTypes::GeneFull_Ex50pAS:
        case SoloFeatureTypes::GeneFull_ExonOverIntron: 
        case -1: //undefined: matrix loaded from file
        {
            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// cell calling (filtering)
            //do the filtering

            //simple filtering first
            nUMIperCBsorted=nUMIperCB;
            std::sort( nUMIperCBsorted.begin(), nUMIperCBsorted.end(), [](const uint32_t &u1, const uint32_t &u2) {return u1>u2;} ); //sort descending

            uint32 nUMImax=0, nUMImin=0;
            if (pSolo.cellFilter.type[0]=="TopCells") {
                nUMImin = nUMIperCBsorted[min(nCB-1,pSolo.cellFilter.topCells)];    
            } else {//other filtering types require simple filtering first
                //find robust max
                uint32 maxind=int(round(pSolo.cellFilter.knee.nExpectedCells*(1.0-pSolo.cellFilter.knee.maxPercentile)));//cell number for robust max
                nUMImax = nUMIperCBsorted[min(nCB-1,maxind)];//robust estimate of the max UMI
                nUMImin = int(round(nUMImax/pSolo.cellFilter.knee.maxMinRatio));
            };
            nUMImin=max(nUMImin,(uint32) 1);//cannot be zero
                
            filteredCells.reset(nCB); //all stats to 0

            for (uint32 icb=0; icb<nCB; icb++) {
                if (nUMIperCB[icb]>=nUMImin) {
                    filteredCells.filtVecBool[icb]=true;
                    filteredCells.nCellsSimple++;
                };
            };
            
            P.inOut->logMain << "cellFiltering: simple: nUMImax="<< nUMImax <<"; nUMImin="<< nUMImin <<"; nCellsSimple="<< filteredCells.nCellsSimple <<endl;

            if (pSolo.cellFilter.type[0]=="EmptyDrops_CR") {
                emptyDrops_CR();
            };
        };
    };
    //filtering is done: filtVecBool=true for kept cells
    
    
    //calculate filtered statistics

    std::vector<uint32_t> geneDetected(featuresNumber, 0); //=1 if a gene was detected in at least one cell

    for (uint32 icb=0; icb<nCB; icb++) {
        if (filteredCells.filtVecBool[icb]) {
            
            filteredCells.nCells++;

            filteredCells.nUMIinCells += nUMIperCB[icb]; //nUMIperCB was calculated for umiDedup-main
            
            if (nReadPerCBunique.size()>0) {//for CellFiltering only, read information is not available
                filteredCells.nReadInCells += nReadPerCBtotal[icb];
                filteredCells.nReadInCellsUnique += nReadPerCBunique[icb];            
                filteredCells.nReadPerCellUnique.push_back(nReadPerCBunique[icb]);
            };
            
            uint32 ng1 = 0; //number of genes detected in this cell
            for (uint32 ig=0; ig<nGenePerCB[icb]; ig++) {
                uint32 indG1=countCellGeneUMIindex[icb]+ig*countMatStride;
                if (countCellGeneUMI[indG1 + pSolo.umiDedup.countInd.main] > 0) {
                    geneDetected[countCellGeneUMI[indG1]] = 1; //gene is present if it's count > 0 for 
                    ng1++;
                };
            };
                        
            filteredCells.nGeneInCells += ng1; //had to recalculate this since some gene counts could be 0
            filteredCells.nGenePerCell.push_back(ng1);
        };
    };   
    
    if (filteredCells.nCells==0) {//all stats were already set to 0
        return;
    };
    
    filteredCells.nGeneDetected=0;
    for (uint32 ii=0; ii<featuresNumber; ii++) {
        if (geneDetected[ii]>0)
            filteredCells.nGeneDetected++;
    };
    
    filteredCells.meanUMIperCell = filteredCells.nUMIinCells / filteredCells.nCells;
    filteredCells.meanReadPerCellUnique = filteredCells.nReadInCellsUnique / filteredCells.nCells;
    filteredCells.meanGenePerCell = filteredCells.nGeneInCells / filteredCells.nCells;
    
    std::sort(filteredCells.nReadPerCellUnique.begin(), filteredCells.nReadPerCellUnique.end());
    std::sort(filteredCells.nGenePerCell.begin(), filteredCells.nGenePerCell.end());

    filteredCells.medianUMIperCell = nUMIperCBsorted[filteredCells.nCells/2];
    filteredCells.medianGenePerCell = filteredCells.nGenePerCell[filteredCells.nCells/2];
    filteredCells.medianReadPerCellUnique = filteredCells.nReadPerCellUnique[filteredCells.nCells/2];
    
    //////////////////////////////////////////////////////////////////output filtered matrix
    outputResults(true, outputPrefixFiltered);

    return;
};