File: SoloFeature_cellFiltering.cpp

package info (click to toggle)
rna-star 2.7.8a%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 3,076 kB
  • sloc: cpp: 20,429; awk: 483; ansic: 470; makefile: 181; sh: 31
file content (127 lines) | stat: -rw-r--r-- 5,454 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#include "SoloFeature.h"
#include "serviceFuns.cpp"
#include <math.h>

void SoloFeature::cellFiltering()
{    

    if (pSolo.cellFilter.type[0]=="None" ||  nCB<1) {//no filtering, or no cells to filter
        return;
    };
    
    switch(featureType) {
        default:
            return; //filtering not done for other features
            
        case SoloFeatureTypes::Velocyto: {
            filteredCells.reset(nCB);
            
            //Velocyto: use filter from Gene
            SoloFeature &soFeGe= *soloFeatAll[pSolo.featureInd[SoloFeatureTypes::Gene]];
            for (uint32 ic=0; ic<soFeGe.nCB; ic++) {
                if (soFeGe.filteredCells.filtVecBool[ic] && indCBwl[soFeGe.indCB[ic]] != (uint32)-1) {
                    //this cell passde Gene filtering, and is detected in Velocyto
                    filteredCells.filtVecBool[indCBwl[soFeGe.indCB[ic]]] = true;
                };
            };
                        
            nUMIperCBsorted=nUMIperCB;
            std::sort( nUMIperCBsorted.begin(), nUMIperCBsorted.end(), [](const uint32_t &u1, const uint32_t &u2) {return u1>u2;} ); //sort descending
            break;
        };
        case SoloFeatureTypes::Gene:
        case SoloFeatureTypes::GeneFull:
        case -1: {//undefined: matrix loaded from file

            ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// cell calling (filtering)
            //do the filtering

            //simple filtering first
            nUMIperCBsorted=nUMIperCB;
            std::sort( nUMIperCBsorted.begin(), nUMIperCBsorted.end(), [](const uint32_t &u1, const uint32_t &u2) {return u1>u2;} ); //sort descending

            uint32 nUMImax=0, nUMImin=0;
            if (pSolo.cellFilter.type[0]=="TopCells") {
                nUMImin = nUMIperCBsorted[min(nCB-1,pSolo.cellFilter.topCells)];    
            } else {//other filtering types require simple filtering first
                //find robust max
                uint32 maxind=int(round(pSolo.cellFilter.knee.nExpectedCells*(1.0-pSolo.cellFilter.knee.maxPercentile)));//cell number for robust max
                nUMImax = nUMIperCBsorted[min(nCB-1,maxind)];//robust estimate of the max UMI
                nUMImin = int(round(nUMImax/pSolo.cellFilter.knee.maxMinRatio));
            };
            nUMImin=max(nUMImin,(uint32) 1);//cannot be zero
                
            filteredCells.reset(nCB); //all stats to 0

            for (uint32 icb=0; icb<nCB; icb++) {
                if (nUMIperCB[icb]>=nUMImin) {
                    filteredCells.filtVecBool[icb]=true;
                    filteredCells.nCellsSimple++;
                };
            };
            
            P.inOut->logMain << "cellFiltering: simple: nUMImax="<< nUMImax <<"; nUMImin="<< nUMImin <<"; nCellsSimple="<< filteredCells.nCellsSimple <<endl;

            if (pSolo.cellFilter.type[0]=="EmptyDrops_CR") {
                emptyDrops_CR();
            };
        };
    };
    //filtering is done: filtVecBool=true for kept cells
    
    
    //calculate filtered statistics

    bool *geneDetected = new bool[featuresNumber]; //=true if a gene was detected in at least one cell
    memset((void*) geneDetected, 0, featuresNumber);

    for (uint32 icb=0; icb<nCB; icb++) {
        if (filteredCells.filtVecBool[icb]) {
            
            filteredCells.nCells++;

            filteredCells.nUMIinCells += nUMIperCB[icb]; //nUMIperCB was calculated for umiDedup-main
            
            filteredCells.nReadInCells += nReadPerCB[icb];
            filteredCells.nReadPerCell.push_back(nReadPerCB[icb]);
            
            uint32 ng1 = 0; //number of genes detected in this cell
            for (uint32 ig=0; ig<nGenePerCB[icb]; ig++) {
                uint32 indG1=countCellGeneUMIindex[icb]+ig*countMatStride;
                if (countCellGeneUMI[indG1 + pSolo.umiDedup.countInd.main] > 0) {
                    geneDetected[countCellGeneUMI[indG1]] = true; //gene is present if it's count > 0 for 
                    ng1++;
                };
            };
                        
            filteredCells.nGeneInCells += ng1; //had to recalculate this since some gene counts could be 0
            filteredCells.nGenePerCell.push_back(ng1);
        };
    };   
    
    if (filteredCells.nCells==0) {//all stats were already set to 0
        return;
    };
    
    filteredCells.nGeneDetected=0;
    for (uint32 ii=0; ii<featuresNumber; ii++) {
        if (geneDetected[ii])
            filteredCells.nGeneDetected++;
    };
    
    filteredCells.meanUMIperCell = filteredCells.nUMIinCells / filteredCells.nCells;
    filteredCells.meanReadPerCell = filteredCells.nReadInCells / filteredCells.nCells;
    filteredCells.meanGenePerCell = filteredCells.nGeneInCells / filteredCells.nCells;
    
    sort(filteredCells.nReadPerCell.begin(), filteredCells.nReadPerCell.end());
    sort(filteredCells.nGenePerCell.begin(), filteredCells.nGenePerCell.end());

    filteredCells.medianUMIperCell = nUMIperCBsorted[filteredCells.nCells/2];
    filteredCells.medianGenePerCell = filteredCells.nGenePerCell[filteredCells.nCells/2];
    filteredCells.medianReadPerCell = filteredCells.nReadPerCell[filteredCells.nCells/2];
    
    //////////////////////////////////////////////////////////////////output filtered matrix
    outputResults(true, outputPrefixFiltered);

    return;
};