File: evaluation.hh

package info (click to toggle)
augustus 3.2.3%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 289,676 kB
  • sloc: cpp: 48,711; perl: 13,339; ansic: 1,251; makefile: 859; sh: 58
file content (156 lines) | stat: -rw-r--r-- 5,776 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/***************************************************************************\
 * Filename : evaluation.hh
 * Author   : Mario Stanke
 * Email    : stanke@math.uni-goettingen.de
 *
 * Copyright: (C) 2002 by Mario Stanke
 *
 * Description: 
 *
 *
 * Date       |   Author      |  Changes
 *------------+---------------+---------------------------------
 * 19.06.2002 | Mario Stanke  | Creation of the file
 * 11.04.2007 | Mario Stanke  | Evaluation of the pred. of the transcription termination site (tts)
 \**************************************************************************/

#ifndef _EVALUATION_HH
#define _EVALUATION_HH

// project includes
#include "types.hh"
#include "gene.hh"
#include "extrinsicinfo.hh"

// standard C/C++ includes
#include <list>
#include <vector>


#define MAXUTRDIST 5000
class Evaluation {
public:
  Evaluation(){
    nukTP = nukFP = nukFN = nukFPinside = 0;
    nucUTP = nucUFP = nucUFN = nucUFPinside = 0;
    exonTP = exonFP_partial = exonFP_overlapping = exonFP_wrong = 0;
    exonFN_partial = exonFN_overlapping = exonFN_wrong = 0;
    UTRexonTP = UTRexonFP = UTRexonFN = 0;
    UTRoffThresh = 20;
    geneTP = geneFN = 0;
    numPredExons = numAnnoExons = 0;
    numPredUTRExons = numAnnoUTRExons = 0;
    numUniquePredExons = numUniqueAnnoExons = 0;
    numUniquePredUTRExons = numUniqueAnnoUTRExons = 0;
    numPredGenes = numAnnoGenes = 0;
    numDataSets = 0;
    longestPredIntronLen = 0;
    tssDist = new int[MAXUTRDIST+1];
    for (int i=0; i<= MAXUTRDIST; i++)
      tssDist[i] = 0;
    numTotalPredTSS = numTSS = 0;
    ttsDist = new int[MAXUTRDIST+1];
    for (int i=0; i<= MAXUTRDIST; i++)
      ttsDist[i] = 0;
    numTotalPredTTS = numTTS = 0;
    leftFlankEnd = rightFlankBegin = -1;
  }
  ~Evaluation(){
      if (tssDist)
	  delete [] tssDist;
      if (ttsDist)
	  delete [] ttsDist;
  };

  void addToEvaluation(Transcript* prediction, Transcript *database, Strand strand, Double quotient = -1.0);
  void addToEvaluation(Transcript* predictedGeneList, Transcript* annotatedGeneList);
  void finishEvaluation();
  void print();
  void printQuotients();
private:
  /*
   * Quick evaluation is fast but requires that both gene lists 
   *
   */
  void evaluateQuickOnNucleotideLevel(State* const predictedExon, int curPredBegin, 
				      State* const annotatedExon, int curAnnoBegin);
  void evaluateQuickOnExonLevel(State* predictedExon, State* annotatedExon);
  void evaluateQuickOnGeneLevel(Transcript* const predictedGeneList, Transcript* const annotatedGeneList);

  void evaluateOnNucleotideLevel(list<State> *predictedExon, list<State> *annotatedExon, bool UTR=false);    
  void evaluateOnExonLevel(list<State> *predictedExon, list<State> *annotatedExon, bool UTR=false);
  void evaluateOnGeneLevel(Transcript* const predictedGeneList, Transcript* const annotatedGeneList);
  void evaluateOnUTRLevel(Transcript* const predictedGeneList, Transcript* const annotatedGeneList);
public:
  // nucleotide level
  int nukTP, nukFP, nukFN,
      nukFPinside; // false positive coding base inside gene area (as opposed to in flanking regions)
  int nucUTP, nucUFP, nucUFN, // UTR bases
    nucUFPinside; // false positive noncoding base inside gene area (as opposed to in flanking regions)
  double nukSens, nukSpec;         // coding base sensitivity and specifity
  double nucUSens, nucUSpec;       // non-coding base sensitivity and specifity
  double exonSens, exonSpec;       // coding exon sensitivity and specificity
  double UTRexonSens, UTRexonSpec; // exon sensitivity and specificity
  double geneSens, geneSpec;       // gene sensitivity and specifity
private:
  //TP = true positive, FP = false positive, FN = false negative
    int  leftFlankEnd, rightFlankBegin;
    list<Double> quotients;
    int longestPredIntronLen;

  // exon level
  int numPredExons, numAnnoExons;
  int numUniquePredExons, numUniqueAnnoExons;
  
  int exonTP, 
    exonFP, 
    exonFP_partial,        // predicted exon unequal to but included in an annotated exon
    exonFP_overlapping,
    exonFP_wrong,
    exonFN,
    exonFN_partial,        // annotated exon  unequal to but included in a predicted exon
    exonFN_overlapping,
    exonFN_wrong;

  // gene level
  int geneTP, geneFP, geneFN;
  int numPredGenes, numAnnoGenes;

  // UTR level
  int *tssDist; // array that holds for each distance the number of predicted TSS that is off by this distance
  int numTSS;   // number of gene pairs (anno, pred) with identical translation start and where both have an annotated TSS
  int numTotalPredTSS;
  double meanTssDist;
  int  medianTssDist;
  int *ttsDist; // array that holds for each distance the number of predicted TTS that is off by this distance
  int numTTS;   // number of gene pairs (anno, pred) with identical stop codon and where both have an annotated TTS
  int numTotalPredTTS;
  double meanTtsDist;
  int  medianTtsDist;
  int numPredUTRExons, numAnnoUTRExons;
  int numUniquePredUTRExons, numUniqueAnnoUTRExons;
  int UTRexonTP, UTRexonFP, UTRexonFN;
  int UTRoffThresh; // count UTR exon as correct, if one end is exact and the other end at most this many bp off
  /*
   * data members for the "Burge-Karlin"-Method computing first the 
   * specifity and sensitivity for each sequence and then taking their
   * means afterwards
   */

  int numDataSets;
  // nukleotide level
  int nukTPBK, nukFPBK, nukFPBKinside, nukFNBK;
           
  // exon level
  int exonTPBK, exonFPBK, exonFNBK;
};

/*
 * predictAndEvaluate
 *
 * Predict genes on the given set of annotated sequences given the current parameters.
 * Then evaluate the accuracy against the given annotation.
 */
Evaluation* predictAndEvaluate(vector<AnnoSequence*> trainGeneList, FeatureCollection &extrinsicFeatures);

#endif // _EVALUATION_HH