File: exoncand.hh

package info (click to toggle)
augustus 3.3.2%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 486,188 kB
  • sloc: cpp: 51,969; perl: 20,926; ansic: 1,251; makefile: 935; python: 120; sh: 118
file content (115 lines) | stat: -rw-r--r-- 4,285 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/*****************************************************************************\
 * Filename : exoncand.hh
 * Author   : Alexander Gebauer
 *
 * Description: Generation of exon candidates (=possible exons)
 *
 * An exon candidate is a sequence interval with frame and strand information
 * such that 
 * at the 5' end is either an ASS or a start codon and
 * at the 3' end is either a DSS or a stop codon.
 *
 * Date       |   Author              |  Changes
 *------------|-----------------------|------------------------------------------
 * 3.11.2011  | Mario Stanke          | creation of the file
 * 06.12.2011 | Alexander Gebauer     | definition of the stop codons
 * 17.01.2012 | Alexander Gebauer     | add class AlignSeq und struct block
 * 27.02.2012 | Alexander Gebauer     | add class ExonCandidate
 * 13.01.2011 | Mario Stanke          | revision: general set of stop codons
\******************************************************************************/

#ifndef _EXONCAND_HH
#define _EXONCAND_HH

#include <stdint.h>

#include "types.hh"
#include "exonmodel.hh" // for OpenReadingFrame
#define EXON_TYPES 17

enum ExonType {UNKNOWN_EXON = -1,
    // forward strand
    singleGene, initial_0, initial_1, initial_2, internal_0, internal_1, internal_2, terminal_exon,
    // reverse strand
    rsingleGene, rinitial_exon, rinternal_0, rinternal_1, rinternal_2, rterminal_0, rterminal_1, rterminal_2
};

bool isPlusExon(ExonType t);

extern const int exonTypeReadingFrames[EXON_TYPES-1];
extern const char* stateExonTypeIdentifiers[EXON_TYPES-1];

// converts a stateTypeIdentifier to the ExonType
ExonType toExonType(const char* str);


class ExonCandidate {
public:
    ExonCandidate(ExonType s=UNKNOWN_EXON, long int b=0, long int e=0, double sc=0.0, Double up_sc=1.0, Double down_sc=1.0):
        type(s),
        begin(b),
        end(e),
        score(sc),
        upScore(up_sc),
        downScore(down_sc)
    {}
    ExonCandidate(ExonCandidate* other){
        begin = other->begin;
        end = other->end;
        type = other->type;
        score = other->score;
        upScore = other->upScore;
        downScore = other->downScore;
    }
    ~ExonCandidate(){}
    ExonType type;
    int begin, end;
    double score;

    int getStart();
    int getEnd();
    int frame() const { return exonTypeReadingFrames[type]; }  // frame of the exon
    int frame(int p) const { return isPlusExon(type) ?         // frame at position p within the exon
	    mod3(frame() - (end + 1) + p) :
	    mod3(frame() +  end + 1  - p);
    }
    Double getUpScore() const {return upScore;}
    Double getDownScore() const {return downScore;}
    double getScore() const {return score;}
    void setUpScore(Double s) {upScore = s;}
    void setDownScore(Double s) {downScore = s;}
    void setScore(double s) {score = s;}

    int getFirstCodingBase();
    int getLastCodingBase();
    int gff3Frame();
    int len() {return end-begin+1;}
    ExonType getExonType();
    int complementType();
    StateType getStateType();
    string key();
    int_fast64_t getKey(); // keys encodes all of: chrStart chrEnd type lenMod3
    bool correctType(const char* dna, int dnalen); // verify ExonType on sequence
    friend ostream& operator<<(ostream& strm, const ExonCandidate &ec);
private:
    Double upScore, downScore;
};

/*
 * assqthresh, dssqthresh are between 0 and 1 and thresholds for the inclusion of
 * acceptor/donor splice sites based on the pattern probability
 * assqthresh=0.05 means that only acceptor ss are considered
 * that have a pattern, such that 5% of true splice site patterns have lower probability.
 * The default threshold of 0 means that all splice site patterns are considered.
 */

void findExonCands(map<int_fast64_t, ExonCandidate*> &ecs, map<int_fast64_t, ExonCandidate*> &addECs, const char *dna, int minLen=1, double assmotifqthresh=0.15, double assqthresh=0.3, double dssqthresh=0.7);

//computes the score for the splice sites of an exon candidate
Double computeSpliceSiteScore(Double exonScore, Double minProb, Double maxProb); 

// create new EC from a key encoding all of: chrStart chrEnd type lenMod3
// verification of type, noInFrameStop, etc.
ExonCandidate* create(int_fast64_t key, const char* dna, int dnalen); 

#endif  //  _EXONCAND_HH