File: speciesgraph.hh

package info (click to toggle)
augustus 3.3.2%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 486,188 kB
  • sloc: cpp: 51,969; perl: 20,926; ansic: 1,251; makefile: 935; python: 120; sh: 118
file content (204 lines) | stat: -rw-r--r-- 8,374 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
/**********************************************************************
 * file:    speciesgraph.hh
 * licence: Artistic Licence, see file LICENCE.TXT or 
 *          http://www.opensource.org/licenses/artistic-license.php
 * descr.:  builds a directed acyclic graph from a set of sampled genes.
 *          The underlying auxiliary structure of the graph consists of seven
 *          neutral lines each representing a type of non-coding segment
 *          In comparative gene prediction for each species an object of
 *          this class is created.
 * authors: Stefanie König
 *
 *********************************************************************/


#ifndef _SPECIESGRAPH
#define _SPECIESGRAPH

#include<unordered_map>

//project includes
#include "graph.hh"

//forward declarations
class Move;

class SpeciesGraph : public AugustusGraph {

private:
    AnnoSequence *seqRange;
    list<ExonCandidate*> *additionalExons; //exons, which are not sampled
    string speciesname;
    Strand strand;
    bool genesWithoutUTRs;
    bool onlyCompleteGenes;
    double ec_score; //temp: until there are real scores for exon candidates
    ofstream *sampled_GFs;        // output file of sampled exons/introns
    bool overlapComp;


public:
    SpeciesGraph(list<Status> *states, AnnoSequence *seq, list<ExonCandidate*> *addEx, string name, Strand s, bool u, bool o, ofstream *gf, bool ov = false) :
	AugustusGraph(states, seq->sequence),
	seqRange(seq),
	additionalExons(addEx),
	speciesname(name),
	strand(s),
	genesWithoutUTRs(u),
	onlyCompleteGenes(o),
	sampled_GFs(gf),
	overlapComp(ov)
    {
	try {
	    ec_score = Properties::getdoubleProperty("/CompPred/ec_score");
	} catch (...) {
	    ec_score = alpha_e * x0_e - r_be;
	}
    }
    ~SpeciesGraph(){
	// AnnoSequence must be deleted by caller (it is needed at other places)
    }
    using AugustusGraph::getKey;

    /*
     * functions to build graph
     */
    
    void buildGraph(double meanIntrLen=numeric_limits<double>::max());
    /*
     * @getKey(): the key of auxilary nodes is 'PosBegin:n_type'
     * the key of nodes representiong Exons is 'PosBegin:PosEnd:StateType'
     */
    string getKey(Node *n);
    void printGraph(string filename); // prints graph in dot-format
    void printSampledGF(Status *st, double score=0); //prints sampled exons/introns to display them with gBrowse
    void printGF(ExonCandidate *ec, double score=0.0, float avgBaseProb=0.0);
    string getSpeciesname() const {return speciesname;}
    char* getSequence() const {return seqRange->sequence;}
    char* getSeqID() const {return seqRange->seqname;}
    int getSeqOffset() const {return seqRange->offset;}
    int getSeqLength() const {return seqRange->length;}
    Strand getSeqStrand() const {return strand;}
    AnnoSequence* getAnnoSeq() const {return seqRange;}
    void printCurrentPath();
    /*
     * maximum weight path problem related functions
     */

    void topSort();         // sorts nodelist of graph topologically
    void dfs(Node* node);   // depth first search, subroutine of topSort()                                
    double relax();         // relaxation of all nodes

    // static functions
    static void setECThold(double t){ec_thold=t;}
    static void setICThold(double t){ic_thold=t;}

    /*
     * parameters that define the scoring function of exons and introns
     * score of exons: ec_thold + a_0x_0 + a_1x_1 + ... + a_nx_n,
     * where x_i are derived from the following set of features: posterior prob, average base prob, log length
     * diversity, omega, variance of omega, conseration, containment, #species in a HECT
     * and the a_i are parameters trained by logistic regression
     */
    static double ec_thold; // ec_thold + a_0*x_0 + a_1*x_1 + ... + a_n*x_n 
    static double ic_thold; // ic_thold + a_0*x_0 + a_1*x_1 + ... + a_n*x_n 
    static double maxCostOfExonLoss;

    /*
     * old code
     */
    /*double localChange(Move *move);                 // local path search with calculation of the score difference between new and old local path
    double getScorePath(Node *begin, Node *end);          // calc. the sum of edge weights of the path: begin ~~> end
    Node* getTopSortPred(Node *node);    //get next node in topological order which is on the path
    Node* getTopSortNext(Node *node);    //get previous node in topological order which is on the path
    Node* getNextExonOnPath(Node *node, size_t step);  // returns the i-th next exon on the current path, where i is the step size (size_t step)
    Node* getPredExonOnPath(Node *node, size_t step);  // returns the i-th preceding exon on the current path
    void printNode(Node *node); //print Node with offset
    */

private:
    /*
     * subroutines of buildGraph()
     */
    template<class T> Node* addExon(T *exon, vector< vector<Node*> > &neutralLines, unordered_map<int32_t,Node*> &auxiliaryNodes); // add an exon to the graph
    void addIntron(Node* pred, Node* succ, Status *intr); // add an intron to the graph
    Node* addLeftSS(Status *exon, vector< vector<Node*> > &neutralLines, unordered_map<int32_t,Node*> &auxiliaryNodes);
    Node* addRightSS(Status *exon, vector< vector<Node*> > &neutralLines, unordered_map<int32_t,Node*> &auxiliaryNodes);
    Node* addAuxilaryNode(NodeType type, int pos, vector< vector<Node*> > &neutralLines, unordered_map<int32_t,Node*> &auxiliaryNodes); // add an auxilary node to the graph if it does not exist already
    Node* getAuxilaryNode(NodeType type, int pos, unordered_map<int32_t,Node*> &auxiliaryNodes) const; // get an auxilary node (returns NULL if node does not exists)
    void addAuxilaryEdge(Node *pred, Node *succ); // add an auxilary edge to the graph
    Node* addAuxNodeToLine(NodeType type, int pos, vector< vector<Node*> >&neutralLines); // add an auxilary node to one of the neutral lines if it does not exist already
    NodeType getPredType(StateType type, int begin, int end) ;  // get the type of gene feature that precedes an exon
    NodeType getSuccType(StateType type) ;    // get the type of gene feature that succeeds an exon 
    list<NodeType> getPredTypes(Node *node) ;  // same as above, but includes stop codon types 
    list<NodeType> getSuccTypes(Node *node) ;    // same as above, but includes stop codon types
    bool isGeneStart(Node *exon);
    bool isGeneEnd(Node *exon);
    Node* addNode(Status *exon); // add a sampled CDS or UTR exon
    Node* addNode(ExonCandidate *exon); // add an additional candidate exon
    Node* addNode(NodeType type, int pos); // add an auxilary node
    /*
     * subroutines of printGraph()
     */
    string getDotNodeAttributes(Node *node);
    string getDotEdgeAttributes(Node *pred, Edge *edge);
};

/*
 * old code: optimize cgp by making small moves
 */
/*struct MoveNode{

    Node *node;
    double weight;

    MoveNode(Node* n, double w) : node(n), weight(w) {}
    ~MoveNode() {}
};

struct MoveEdge{

    Edge *edge;
    double weight;

    MoveEdge(Edge* e, double w) : edge(e), weight(w) {}
    ~MoveEdge() {}
};

class Move{

private:
    SpeciesGraph *graph;
    size_t step_size;
    list<MoveNode> nodes;  // sorted list of nodes
    list<MoveEdge> edges;  // sorted list of edges
    Node* local_head;
    Node* local_tail;

public:
    Move(SpeciesGraph *g, size_t s = 1) :
	graph(g),
	step_size(s),
	local_head(NULL),
	local_tail(NULL)
    {}
    ~Move() {}

    Node* getHead() const {return local_head;}
    Node* getTail() const {return local_tail;}
    void addNodeBack(Node* node, double weight){nodes.push_back(MoveNode(node, weight));}
    void addEdgeBack(Edge* edge, double weight){edges.push_back(MoveEdge(edge, weight));}
    void addNodeFront(Node* node, double weight){nodes.push_front(MoveNode(node, weight));}
    void addEdgeFront(Edge* edge, double weight){edges.push_front(MoveEdge(edge, weight));}
    Node* getNodeBack() const {return nodes.back().node;}
    Node* getNodeFront() const {return nodes.front().node;}
    bool nodesIsEmpty() const {return nodes.empty();}
    void shiftHead(size_t step = 1){local_head = graph->getPredExonOnPath(local_head,step);}
    void shiftTail(size_t step = 1){local_tail = graph->getNextExonOnPath(local_tail,step);}
    void addWeights();
    void undoAddWeights();
    void initLocalHeadandTail();  //determines local_head and local_tail
};
*/
#endif  // _SPECIESGRAPH