File: SoloFeature.h

package info (click to toggle)
rna-star 2.7.8a%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 3,076 kB
  • sloc: cpp: 20,429; awk: 483; ansic: 470; makefile: 181; sh: 31
file content (94 lines) | stat: -rwxr-xr-x 3,539 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#ifndef H_SoloFeature
#define H_SoloFeature

#include <fstream>
#include <unordered_map>
#include <unordered_set>

#include "IncludeDefine.h"
#include "ReadAlignChunk.h"
#include "Transcriptome.h"

#include "SoloCommon.h"
#include "SoloRead.h"
#include "ReadAlignChunk.h"

#include "SoloFilteredCells.h"

class SoloFeature {
private:
    Parameters &P;
    ReadAlignChunk **RAchunk;    
    Transcriptome &Trans;
    
    const int32 featureType;   
    SoloFeature **soloFeatAll;
    
    static const uint32 umiArrayStride=3;
    enum {rguG, rguU, rguR};
    uint32 rguStride;
    
public:
    ParametersSolo &pSolo;

    SoloReadFeature *readFeatSum, **readFeatAll;
    SoloReadBarcode *readBarSum;

    uint64 nReadsMapped, nReadsInput; //total number of mapped reads
    uint32 nCB;
    uint32 featuresNumber; //number of features (i.e. genes, SJs, etc)

    uint32 *rGeneUMI;//mapped reads sorted by CB
    uint32 *rCBn;//number of reads for detected CBs in the whitelist
    uint32 **rCBp;//array of pointers to each CB sub-array

    vector<uint32> indCB;//index of detected CBs in the whitelist
    vector<uint32> indCBwl; //reverse of indCB: index of WL CBs in detected CB list
    vector<uint32> nUMIperCB, nUMIperCBsorted;//number of UMIs per CB, and the same sorted (descendant)
    vector<uint32> nGenePerCB;//number of genes (with >0 UMIs) per CB
    vector<uint32> nReadPerCB;//number of reads per CB
    
    vector<uint32> countCellGeneUMI;//sparsified matrix for the counts, each entry is: geneID count1 count2 ... countNcounts
    vector<uint32> countCellGeneUMIindex;//index of CBs in the count matrix
    uint32 countMatStride; //number of counts per entry in the count matrix
    
    vector<unordered_map<uint32, unordered_set<uint64>>> cbFeatureUMImap; //for SmartSeq counting
       
    string outputPrefix, outputPrefixFiltered;
    
    SoloFilteredCells filteredCells;
    
    array<vector<uint64>,2> sjAll;
    
    vector<readInfoStruct> readInfo; //corrected CB/UMI information for each read
    
    vector<uint32> redistrFilesCBindex, redistrFilesCBfirst; //redistr file for each CB, CB boundaries in redistributed files
    vector<uint64> redistrFilesNreads; //number of reads in each file
    vector <fstream*> redistrFilesStreams;

    SoloFeature(Parameters &Pin, ReadAlignChunk **RAchunk, Transcriptome &inTrans, int32 feTy, SoloReadBarcode *readBarSumIn, SoloFeature **soloFeatAll);
    void processRecords();
    void sumThreads();
    void countSmartSeq();
    void countCBgeneUMI();
    void countVelocyto();
    void quantTranscript();
    
    void collapseUMI(uint32 iCB, uint32 *umiArray);
    void collapseUMI_CR(uint32 iCB, uint32 *umiArray);
    void collapseUMIall(uint32 iCB, uint32 *umiArray);
    uint32 umiArrayCorrect_CR         (const uint32 nU0, uintUMI *umiArr, const bool readInfoRec, const bool nUMIyes, unordered_map <uintUMI,uintUMI> &umiCorr);
    uint32 umiArrayCorrect_Directional(const uint32 nU0, uintUMI *umiArr, const bool readInfoRec, const bool nUMIyes, unordered_map <uintUMI,uintUMI> &umiCorr, const int32 dirCountAdd);
    uint32 umiArrayCorrect_Graph      (const uint32 nU0, uintUMI *umiArr, const bool readInfoRec, const bool nUMIyes, unordered_map <uintUMI,uintUMI> &umiCorr);

    void outputResults(bool cellFilterYes, string outputPrefixMat);
    void addBAMtags(char *&bam0, uint32 &size0, char* bam1);
    void statsOutput();
    void redistributeReadsByCB();
    
    void cellFiltering();
    void emptyDrops_CR();
    void loadRawMatrix();
};

#endif