1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
|
#include "SoloFeature.h"
#include "streamFuns.h"
#include "TimeFunctions.h"
#include "SequenceFuns.h"
void SoloFeature::countCBgeneUMI()
{
time_t rawTime;
rguStride=2;
if (pSolo.readInfoYes[featureType]) {
rguStride=3; //to keep readI column
readInfo.resize(nReadsInput,{(uint64)-1,(uint32)-1});
time(&rawTime);
P.inOut->logMain << timeMonthDayTime(rawTime) << " ... Allocated and initialized readInfo array, nReadsInput = " << nReadsInput <<endl;
};
rGeneUMI = new uint32[rguStride*nReadsMapped]; //big array for all CBs - each element is gene and UMI
rCBp = new uint32*[nCB+1];
uint32 **rCBpa = new uint32*[pSolo.cbWLsize+1];
rCBp[0]=rGeneUMI;
rCBpa[0]=rGeneUMI;
nCB=0;//will count it again below
for (uint32 ii=0; ii<pSolo.cbWLsize; ii++) {
if (readFeatSum->cbReadCount[ii]>0) {
rCBp[nCB+1] = rCBp[nCB] + rguStride*readFeatSum->cbReadCount[ii];
++nCB;
};
rCBpa[ii+1]=rCBp[nCB];
};
//read and store the CB/gene/UMI from files
time(&rawTime);
P.inOut->logMain << timeMonthDayTime(rawTime) << " ... Finished allocating arrays for Solo " << nReadsMapped*rguStride*4.0/1024/1024/1024 <<" GB" <<endl;
///////////////////////////////////////////////////////////////////////////
////////////// Input records
for (int ii=0; ii<P.runThreadN; ii++) {//TODO: this can be parallelized
readFeatAll[ii]->inputRecords(rCBpa, rguStride, readBarSum->cbReadCountExact, readInfo);
};
nReadPerCB.resize(nCB);
uint32 nReadPerCBmax=0;
for (uint32 iCB=0; iCB<nCB; iCB++) {
nReadPerCB[iCB] = (rCBpa[indCB[iCB]]-rCBp[iCB])/rguStride; //number of reads that were matched to WL, rCBpa accumulated reference to the last element+1
nReadPerCBmax=max(nReadPerCBmax,nReadPerCB[iCB]);
readFeatSum->stats.V[readFeatSum->stats.nMatch] += nReadPerCB[iCB];
};
for (int ii=0; ii<P.runThreadN; ii++) {
readFeatSum->addStats(*readFeatAll[ii]);
};
time(&rawTime);
P.inOut->logMain << timeMonthDayTime(rawTime) << " ... Finished reading reads from Solo files nCB="<<nCB <<", nReadPerCBmax="<<nReadPerCBmax;
P.inOut->logMain <<", nMatch="<<readFeatSum->stats.V[readFeatSum->stats.nMatch]<<endl;
//////////////////////////////////////////////////////////////////////////////
/////////////////////////// collapse each CB
nUMIperCB.resize(nCB);
nGenePerCB.resize(nCB);
uint32 *umiArray = new uint32[nReadPerCBmax*umiArrayStride];//temp array for collapsing UMI
countMatStride=pSolo.umiDedup.yes.N + 1;
countCellGeneUMI.resize(nReadsMapped*countMatStride/5+16); //5 is heuristic, will be resized if needed
countCellGeneUMIindex.resize(nCB+1);
countCellGeneUMIindex[0]=0;
for (uint32 icb=0; icb<nCB; icb++) {//main collapse cycle
collapseUMIall(icb, umiArray);
readFeatSum->stats.V[readFeatSum->stats.nUMIs] += nUMIperCB[icb];
if (nGenePerCB[icb]>0)
++readFeatSum->stats.V[readFeatSum->stats.nCellBarcodes];
};
delete[] rGeneUMI;
//delete[] rCBp;
time(&rawTime);
P.inOut->logMain << timeMonthDayTime(rawTime) << " ... Finished collapsing UMIs" <<endl;
};
|