1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
|
#include "SoloFeature.h"
#include "serviceFuns.cpp"
#include <math.h>
void SoloFeature::cellFiltering()
{
if (pSolo.cellFilter.type[0]=="None" || nCB<1) {//no filtering, or no cells to filter
return;
};
switch(featureType) {
default:
return; //filtering not done for other features
case SoloFeatureTypes::Velocyto: {
filteredCells.reset(nCB);
//Velocyto: use filter from Gene
SoloFeature &soFeGe= *soloFeatAll[pSolo.featureInd[SoloFeatureTypes::Gene]];
for (uint32 ic=0; ic<soFeGe.nCB; ic++) {
if (soFeGe.filteredCells.filtVecBool[ic] && indCBwl[soFeGe.indCB[ic]] != (uint32)-1) {
//this cell passde Gene filtering, and is detected in Velocyto
filteredCells.filtVecBool[indCBwl[soFeGe.indCB[ic]]] = true;
};
};
nUMIperCBsorted=nUMIperCB;
std::sort( nUMIperCBsorted.begin(), nUMIperCBsorted.end(), [](const uint32_t &u1, const uint32_t &u2) {return u1>u2;} ); //sort descending
break;
};
case SoloFeatureTypes::Gene:
case SoloFeatureTypes::GeneFull:
case -1: {//undefined: matrix loaded from file
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// cell calling (filtering)
//do the filtering
//simple filtering first
nUMIperCBsorted=nUMIperCB;
std::sort( nUMIperCBsorted.begin(), nUMIperCBsorted.end(), [](const uint32_t &u1, const uint32_t &u2) {return u1>u2;} ); //sort descending
uint32 nUMImax=0, nUMImin=0;
if (pSolo.cellFilter.type[0]=="TopCells") {
nUMImin = nUMIperCBsorted[min(nCB-1,pSolo.cellFilter.topCells)];
} else {//other filtering types require simple filtering first
//find robust max
uint32 maxind=int(round(pSolo.cellFilter.knee.nExpectedCells*(1.0-pSolo.cellFilter.knee.maxPercentile)));//cell number for robust max
nUMImax = nUMIperCBsorted[min(nCB-1,maxind)];//robust estimate of the max UMI
nUMImin = int(round(nUMImax/pSolo.cellFilter.knee.maxMinRatio));
};
nUMImin=max(nUMImin,(uint32) 1);//cannot be zero
filteredCells.reset(nCB); //all stats to 0
for (uint32 icb=0; icb<nCB; icb++) {
if (nUMIperCB[icb]>=nUMImin) {
filteredCells.filtVecBool[icb]=true;
filteredCells.nCellsSimple++;
};
};
P.inOut->logMain << "cellFiltering: simple: nUMImax="<< nUMImax <<"; nUMImin="<< nUMImin <<"; nCellsSimple="<< filteredCells.nCellsSimple <<endl;
if (pSolo.cellFilter.type[0]=="EmptyDrops_CR") {
emptyDrops_CR();
};
};
};
//filtering is done: filtVecBool=true for kept cells
//calculate filtered statistics
bool *geneDetected = new bool[featuresNumber]; //=true if a gene was detected in at least one cell
memset((void*) geneDetected, 0, featuresNumber);
for (uint32 icb=0; icb<nCB; icb++) {
if (filteredCells.filtVecBool[icb]) {
filteredCells.nCells++;
filteredCells.nUMIinCells += nUMIperCB[icb]; //nUMIperCB was calculated for umiDedup-main
filteredCells.nReadInCells += nReadPerCB[icb];
filteredCells.nReadPerCell.push_back(nReadPerCB[icb]);
uint32 ng1 = 0; //number of genes detected in this cell
for (uint32 ig=0; ig<nGenePerCB[icb]; ig++) {
uint32 indG1=countCellGeneUMIindex[icb]+ig*countMatStride;
if (countCellGeneUMI[indG1 + pSolo.umiDedup.countInd.main] > 0) {
geneDetected[countCellGeneUMI[indG1]] = true; //gene is present if it's count > 0 for
ng1++;
};
};
filteredCells.nGeneInCells += ng1; //had to recalculate this since some gene counts could be 0
filteredCells.nGenePerCell.push_back(ng1);
};
};
if (filteredCells.nCells==0) {//all stats were already set to 0
return;
};
filteredCells.nGeneDetected=0;
for (uint32 ii=0; ii<featuresNumber; ii++) {
if (geneDetected[ii])
filteredCells.nGeneDetected++;
};
filteredCells.meanUMIperCell = filteredCells.nUMIinCells / filteredCells.nCells;
filteredCells.meanReadPerCell = filteredCells.nReadInCells / filteredCells.nCells;
filteredCells.meanGenePerCell = filteredCells.nGeneInCells / filteredCells.nCells;
sort(filteredCells.nReadPerCell.begin(), filteredCells.nReadPerCell.end());
sort(filteredCells.nGenePerCell.begin(), filteredCells.nGenePerCell.end());
filteredCells.medianUMIperCell = nUMIperCBsorted[filteredCells.nCells/2];
filteredCells.medianGenePerCell = filteredCells.nGenePerCell[filteredCells.nCells/2];
filteredCells.medianReadPerCell = filteredCells.nReadPerCell[filteredCells.nCells/2];
//////////////////////////////////////////////////////////////////output filtered matrix
outputResults(true, outputPrefixFiltered);
return;
};
|