File: SoloReadFeature_inputRecords.cpp

package info (click to toggle)
rna-star 2.7.11b%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,492 kB
  • sloc: cpp: 21,951; awk: 827; ansic: 457; makefile: 192; sh: 31
file content (172 lines) | stat: -rwxr-xr-x 7,009 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#include <cmath>
#include "SoloReadFeature.h"
#include "SoloCommon.h"
#include "SoloFeature.h"
#include "soloInputFeatureUMI.h"
#include "serviceFuns.cpp"

void SoloReadFeature::inputRecords(uint32 **cbP, uint32 cbPstride, vector<uint32> &cbReadCountTotal, vector<readInfoStruct> &readInfo, SoloReadFlagClass &readFlagCounts,
                                   vector<uint32> &nReadPerCBunique1, vector<uint32> &nReadPerCBmulti1)
{   
    streamReads->flush();
    streamReads->seekg(0,std::ios::beg);

    //////////////////////////////////////////// standard features
    uint32 feature;
    uint64 umi, iread, prevIread=(uint64)-1;
    int32 cbmatch;
    int64 cb;
    vector<uint32> trIdDist;
    
    uint64 nReadsIn = 0;

    while (soloInputFeatureUMI(streamReads, featureType, readIndexYes, P.sjAll, iread, cbmatch, feature, umi, trIdDist, readFlagCounts)) {
        if (feature == (uint32)(-1) && !readIndexYes) {//no feature => no record, this can happen for SJs
            streamReads->ignore((uint32)-1, '\n');
            //stats.V[stats.noNoFeature]++; //need separate category for this
            continue;
        };

        bool readIsCounted = false;
        bool featGood = ( feature != (uint32)(-1) );
        bool noMMtoWLwithoutExact = false;
        bool noTooManyWLmatches = false;

        if (cbmatch<=1) {//single match
            *streamReads >> cb;

            if ( pSolo.CBmatchWL.oneExact && cbmatch==1 && cbReadCountTotal[cb]==0 ) {//single 1MM match, no exact matches to this CB
                noMMtoWLwithoutExact = true;

            } else {

                if (!pSolo.cbWLyes) {//if no-WL, the full cbInteger was recorded - now has to be placed in order
                    cb=binarySearchExact<uintCB>(cb, pSolo.cbWL.data(), pSolo.cbWLsize);
                    if (cb+1 == 0)
                        continue; //this cb was not in the tentative WL
                };

                //record feature
                if (featGood) {//good feature, will be counted
                    readIsCounted = true;

                    cbP[cb][0]=feature;
                    cbP[cb][1]=umi;

                    if (readIndexYes) {
                        cbP[cb][2]=iread;
                    };

                    cbP[cb]+=cbPstride;

                } else if (readInfoYes) {//no feature - record readInfo
                    readInfo[iread].cb=cb;
                    readInfo[iread].umi=umi;
                };
            };

        } else {//multiple matches
            #ifdef MATCH_CellRanger
            double ptot=0.0, pmax=0.0, pin;
            #else
            float ptot=0.0, pmax=0.0, pin;
            #endif

            for (uint32 ii=0; ii<(uint32)cbmatch; ii++) {
                uint32 cbin;
                char  qin;
                *streamReads >> cbin >> qin;
                if (cbReadCountTotal[cbin]>0) {//otherwise this cbin does not work
                    qin -= pSolo.QSbase;
                    qin = qin < pSolo.QSmax ? qin : pSolo.QSmax;
                    pin=cbReadCountTotal[cbin]*std::pow(10.0,-qin/10.0);
                    ptot+=pin;
                    if (pin>pmax) {
                        cb=cbin;
                        pmax=pin;
                    };
                };
            };
            if (ptot>0.0 && pmax>=pSolo.cbMinP*ptot) {
                //record feature single-number feature
                if (featGood) {
                    readIsCounted = true;
                    cbP[cb][0]=feature;
                    cbP[cb][1]=umi;
                    if (readIndexYes) {
                        cbP[cb][2]=iread;
                    };
                    cbP[cb]+=cbPstride;
                } else if (readInfoYes) {//no feature - record readInfo
                    readInfo[iread].cb=cb;
                    readInfo[iread].umi=umi;
                };
            } else {
                noTooManyWLmatches = true;
            };
        };

        if ( !readIndexYes || iread != prevIread ) {//no readindex (then only unique-gene reads are recorded) OR only for one align of each read, in case of multimappers
            prevIread = iread;
            if (featGood) {
                if (cbmatch==0) {
                    stats.V[stats.yessubWLmatchExact]++;
                } else if (noMMtoWLwithoutExact) {
                    stats.V[stats.noMMtoWLwithoutExact]++;
                } else if (noTooManyWLmatches) {
                    stats.V[stats.noTooManyWLmatches]++;
                };
            };

            if (readIsCounted) {
                if (feature<geneMultMark) {
                    nReadPerCBunique1[cb]++;
                } else {
                    nReadPerCBmulti1[cb]++;
                };
            };
            
            if ( pSolo.readStatsYes[featureType] ) {//has to be new iread to avoid muti-counting multi-gene reads
                
                //readIsCounted flag was defined above
                if ( readIsCounted ) {
                    if ( readFlagCounts.checkBit(readFlagCounts.featureU) )
                        readFlagCounts.setBit(readFlagCounts.countedU);
                    if ( readFlagCounts.checkBit(readFlagCounts.featureM) )
                        readFlagCounts.setBit(readFlagCounts.countedM);    
                };

                readFlagCounts.setBit(readFlag.cbMatch); //set this flag for both CB and no-CB, but no-CB will be counted separately below
                //only record this read in readFlagCounts if its CB was defined
                if (cbmatch==0) {//perfect CB match to WL
                    readFlagCounts.setBit(readFlagCounts.cbPerfect);
                    readFlagCounts.countsAdd(cb);
                } else if (cbmatch==1 && !noMMtoWLwithoutExact) {
                    readFlagCounts.setBit(readFlagCounts.cbMMunique);
                    readFlagCounts.countsAdd(cb);
                } else if (cbmatch>1 && !noTooManyWLmatches) {
                    readFlagCounts.setBit(readFlagCounts.cbMMmultiple);
                    readFlagCounts.countsAdd(cb);
                } else {//no CB match
                    readFlagCounts.countsAddNoCB();
                };

                // debug
                /*nReadsIn++;
                uint64 ntot=0;
                for (auto &ii: readFlagCounts.flagCounts)
                    ntot += ii.second[0];

                if (nReadsIn!=ntot)
                    cout << nReadsIn <<" "<< ntot << endl;
                */
                /*//if (featureType==SoloFeatureTypes::Gene && readFlagCounts.flagCounts[cb][readFlagCounts.featureM]+readFlagCounts.flagCounts[cb][readFlagCounts.featureU] != readFlagCounts.flagCounts[cb][readFlagCounts.exonic])
                //    cout << cb <<' '<< iread << endl;
                //if (readFlagCounts.checkBit(readFlagCounts.featureM))
                //    cout << iread <<' '<< readFlagCounts.flagCounts[cb][readFlagCounts.featureM] << endl;
                */
            };
        };
    };
};