File: SoloFeature_sumThreads.cpp

package info (click to toggle)
rna-star 2.7.8a%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 3,076 kB
  • sloc: cpp: 20,429; awk: 483; ansic: 470; makefile: 181; sh: 31
file content (90 lines) | stat: -rw-r--r-- 3,290 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#include "SoloFeature.h"
#include "streamFuns.h"
#include "TimeFunctions.h"
#include "SequenceFuns.h"
#include "Stats.h"
#include "GlobalVariables.h"

void SoloFeature::sumThreads()
{   
    //stats
    nReadsInput=g_statsAll.readN+1; //reserve 1 extra

    ///////////////////////////// collect RAchunk->RA->soloRead->readFeat            
    for (int ii=0; ii<P.runThreadN; ii++) {//point to
        readFeatAll[ii]= RAchunk[ii]->RA->soloRead->readFeat[pSolo.featureInd[featureType]];
        readFeatAll[ii]->streamReads->flush();
        readFeatSum->addCounts(*readFeatAll[ii]);        
    };       
    
    // if WL was not defined
    if (!pSolo.cbWLyes) {//now we can define WL and counts ??? we do not need to do it for every feature???
        pSolo.cbWLsize=readFeatSum->cbReadCountMap.size();
        pSolo.cbWL.resize(pSolo.cbWLsize);
        pSolo.cbWLstr.resize(pSolo.cbWLsize);
        uint64 ii=0;
        for (auto &cb : readFeatSum->cbReadCountMap) {
            pSolo.cbWL[ii] = cb.first;
            pSolo.cbWLstr[ii] = convertNuclInt64toString(pSolo.cbWL[ii],pSolo.cbL); 
            ii++;
        };
        readFeatSum->cbReadCount.resize(pSolo.cbWLsize);
        readBarSum->cbReadCountExact.resize(pSolo.cbWLsize);

        uint64 icb=0;
        for (auto ii=readFeatSum->cbReadCountMap.cbegin(); ii!=readFeatSum->cbReadCountMap.cend(); ++ii) {
            pSolo.cbWL[icb]=ii->first;
            readFeatSum->cbReadCount[icb]=ii->second;
            readBarSum->cbReadCountExact[icb]=ii->second;
            ++icb;
        };
    };

    // if restarting from _STARtmp/solo* file
    if (P.runRestart.type==1) {//this could happen if the run is restarted. Would be better to save/load cbReadCount, or recalculate it from
        for (int ii=0; ii<P.runThreadN; ii++) {
            readFeatAll[ii]->streamReads->clear(); //just in case EOF was reached in previous reading
            readFeatAll[ii]->streamReads->seekg(0,ios::beg);
            string line1;
            while (std::getline(*readFeatAll[ii]->streamReads, line1)) {
                istringstream line1stream(line1);
                uint64 cb1;            
                line1stream >> cb1 >> cb1 >> cb1;
                if (featureType==SoloFeatureTypes::SJ)
                    line1stream >> cb1;
                line1stream >> cb1;
                //if (cb1>readFeatSum->cbReadCount.size())
                //    continue;//this should not happen!
                readFeatSum->cbReadCount[cb1]++;
            };
        };
    };    
    
    //detected CBs
    nCB=0;nReadsMapped=0;
    for (uint32 ii=0; ii<pSolo.cbWLsize; ii++) {
        if (readFeatSum->cbReadCount[ii]>0) {
            nCB++;
            nReadsMapped += readFeatSum->cbReadCount[ii];
        };
    };
    
    indCBwl.resize(pSolo.cbWLsize, (uint32) -1);
    indCB.resize(nCB);
    nCB=0;//will count it again below
    for (uint32 ii=0; ii<pSolo.cbWLsize; ii++) {
        if (readFeatSum->cbReadCount[ii]>0) {
            indCB[nCB]=ii;
            indCBwl[ii]=nCB;
            ++nCB;
        };
    };
    
    //pseudocounts
    if (pSolo.CBmatchWL.mm1_multi_pc) {
        for (uint32 ii=0; ii<pSolo.cbWLsize; ii++) {
            readBarSum->cbReadCountExact[ii]++;//add one to exact counts
        };
    };
};