File: ParametersSolo.h

package info (click to toggle)
rna-star 2.7.8a%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 3,076 kB
  • sloc: cpp: 20,429; awk: 483; ansic: 470; makefile: 181; sh: 31
file content (166 lines) | stat: -rwxr-xr-x 5,094 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#ifndef CODE_ParametersSolo
#define CODE_ParametersSolo

#include <array>

#include "IncludeDefine.h"
#include "SoloBarcode.h"
#include "SoloFeatureTypes.h"

class Parameters;
class ParametersSolo;

class UMIdedup {
public:
    const static uint32 tN = 6;
    array<string,tN> typeNames { {"NoDedup", "Exact", "1MM_All", "1MM_Directional", "1MM_CR", "1MM_Directional_UMItools"} };
    enum typeI : int32 { NoDedup=0, Exact=1, All=2, Directional=3, CR=4, Directional_UMItools=5 };
    
    struct {
        uint32_t N;
        array<bool,tN> B;
        bool &NoDedup=B[0], &Exact=B[1], &All=B[2], &Directional=B[3], &CR=B[4], &Directional_UMItools=B[5]; 
    } yes;

    struct {
        //uint32_t N;
        array<uint32_t,tN> I;
        uint32_t &NoDedup=I[0], &Exact=I[1], &All=I[2], &Directional=I[3], &CR=I[4], &Directional_UMItools=I[5];
        uint32_t main; //index for SAM/stats/filtering output
    } countInd; //index in the countCellGennUMI
    
    vector<string> typesIn; //UMIdedup types from user options
    vector<int32> types; //the above converted to typeI numbers
    int32 typeMain; //the type to be used in SAM/stats/filtering output - for now just types[0]
    
    void initialize(ParametersSolo *pS);
    
protected:
    int it;
};

class ParametersSolo {
public:
    Parameters *pP;
    
    //chemistry, library etc
    string typeStr;
    enum SoloTypes : int32 {None=0, CB_UMI_Simple=1, CB_UMI_Complex=2, CB_samTagOut=3, SmartSeq=4};
    SoloTypes type;
    bool yes;
    string strandStr;
    int32 strand;   
    
    uint32 barcodeRead, barcodeReadIn;//which read is the barcode read = 0,1,2?
    uint32 barcodeStart, barcodeEnd;//start/end of barcode sequence on barcodeRead
    bool barcodeReadSeparate;
    
    //simple barcodes
    uint32 cbS, cbL; //cell barcode start,length
    uint32 umiS, umiL; //umi start,length
    uint32 bL, cbumiL; //total barcode sequene length, CB+UMI length. Former does may not be equal to the latter

    vector<string> cbPositionStr;
    string umiPositionStr;
    
    //complex barcodes    
    vector<SoloBarcode> cbV;
    SoloBarcode umiV; //single UMI
    bool adapterYes; //anchor?  
    string adapterSeq; //anchor sequence
    uint32 adapterMismatchesNmax;//max number of mismatches in the anchor
    
    //input from SAM files
    vector<string> samAtrrBarcodeSeq, samAtrrBarcodeQual;
    
    //whitelist - general
    uint64 cbWLsize;
    bool cbWLyes;
    vector<string> soloCBwhitelist;
    vector <uint64> cbWL;    
    vector<string> cbWLstr;
    
    //features
    vector<string> featureIn;//string of requested features
    vector<uint32> features;
    uint32 nFeatures;//=features.size(), number of requested features
    
    array<bool,SoloFeatureTypes::N> featureYes; //which features are requested
    array<bool,SoloFeatureTypes::N> readInfoYes;//which features will readInfo (for now only Gene)
    array<int32,SoloFeatureTypes::N> featureInd;//index of each feature - skips unrequested features
    
    //filtering
    char QSbase,QSmax;//quality score base and cutoff

    #ifdef MATCH_CellRanger
    double cbMinP;//for CBs with non-exact matching to WL, min posterior probability
    #else
    float cbMinP;//for CBs with non-exact matching to WL, min posterior probability
    #endif
    
    //cell filtering
    struct {
        vector<string> type;
        uint32 topCells;
        
        struct {
            double nExpectedCells;
            double maxPercentile;
            double maxMinRatio;
        } knee;
        
        struct {
            uint32 indMin, indMax; //min/max cell index, sorted by UMI counts,for empty cells
            uint32 umiMin;
            double umiMinFracMedian;
            uint32 candMaxN;
            double FDR;
            uint32 simN;
        } eDcr;//EmptyDrops-CellRanger
        
    } cellFilter;
      
    //CB match
    struct {
        string type;
        bool mm1; //1 mismatch allowed
        bool mm1_multi; //1 mismatch, multiple matches to WL allowed
        bool oneExact; //CBs require at least one exact match
        bool mm1_multi_pc; //use psedocounts while calculating probabilities of multi-matches
        bool mm1_multi_Nbase; //allow multimatching to WL for CBs with N-bases
    } CBmatchWL;
    
    //UMIdedup
    UMIdedup umiDedup;
    
    //multi-gene umi
    struct {
        vector<string> type;
        bool MultiGeneUMI;
        bool MultiGeneUMI_CR;
    } umiFiltering;
    
    //clusters
    string clusterCBfile;
    
    //output
    vector<string> outFileNames;    
    struct {
    	string featuresGeneField3;
    } outFormat;

    bool samAttrYes;//post-processed SAM attributes: error-corrected CB and UMI
    int32 samAttrFeature;//which feature to use for error correction

    //processing
    uint32 redistrReadsNfiles; //numer of files to resditribute reads into
    
    //constants
    uint32 umiMaskLow, umiMaskHigh; //low/high half bit-mask or UMIs

    void initialize(Parameters *pPin);
    void umiSwapHalves(uint32 &umi);
    void complexWLstrings();
    void cellFiltering();
};
#endif