Augustus 3.4.0
Loading...
Searching...
No Matches
evaluation.hh
1/*
2 * evaluation.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 */
7
8#ifndef _EVALUATION_HH
9#define _EVALUATION_HH
10
11// project includes
12#include "types.hh"
13#include "gene.hh"
14#include "extrinsicinfo.hh"
15
16// standard C/C++ includes
17#include <list>
18#include <vector>
19
20
21#define MAXUTRDIST 5000
22
29public:
30 Evaluation(){
31 nukTP = nukFP = nukFN = nukFPinside = 0;
32 nucUTP = nucUFP = nucUFN = nucUFPinside = 0;
33 exonTP = exonFP_partial = exonFP_overlapping = exonFP_wrong = 0;
34 exonFN_partial = exonFN_overlapping = exonFN_wrong = 0;
35 UTRexonTP = UTRexonFP = UTRexonFN = 0;
36 UTRoffThresh = 20;
37 geneTP = geneFN = 0;
38 numPredExons = numAnnoExons = 0;
39 numPredUTRExons = numAnnoUTRExons = 0;
40 numUniquePredExons = numUniqueAnnoExons = 0;
41 numUniquePredUTRExons = numUniqueAnnoUTRExons = 0;
42 numPredGenes = numAnnoGenes = 0;
43 numDataSets = 0;
44 longestPredIntronLen = 0;
45 tssDist = new int[MAXUTRDIST+1];
46 for (int i=0; i<= MAXUTRDIST; i++)
47 tssDist[i] = 0;
48 numTotalPredTSS = numTSS = 0;
49 ttsDist = new int[MAXUTRDIST+1];
50 for (int i=0; i<= MAXUTRDIST; i++)
51 ttsDist[i] = 0;
52 numTotalPredTTS = numTTS = 0;
53 leftFlankEnd = rightFlankBegin = -1;
54 }
56 if (tssDist)
57 delete [] tssDist;
58 if (ttsDist)
59 delete [] ttsDist;
60 };
61
62 void addToEvaluation(Transcript* prediction, Transcript *database, Strand strand, Double quotient = -1.0);
63 void addToEvaluation(Transcript* predictedGeneList, Transcript* annotatedGeneList);
64 void finishEvaluation();
65 void print();
66 void printQuotients();
67private:
68 /*
69 * Quick evaluation is fast but requires that both gene lists
70 *
71 */
72 void evaluateQuickOnNucleotideLevel(State* const predictedExon, int curPredBegin,
73 State* const annotatedExon, int curAnnoBegin);
74 void evaluateQuickOnExonLevel(State* predictedExon, State* annotatedExon);
75 void evaluateQuickOnGeneLevel(Transcript* const predictedGeneList, Transcript* const annotatedGeneList);
76
77 void evaluateOnNucleotideLevel(list<State> *predictedExon, list<State> *annotatedExon, bool UTR=false);
78 void evaluateOnExonLevel(list<State> *predictedExon, list<State> *annotatedExon, bool UTR=false);
79 void evaluateOnGeneLevel(Transcript* const predictedGeneList, Transcript* const annotatedGeneList);
80 void evaluateOnUTRLevel(Transcript* const predictedGeneList, Transcript* const annotatedGeneList);
81public:
82 // nucleotide level
83 int nukTP, nukFP, nukFN,
84 nukFPinside; // false positive coding base inside gene area (as opposed to in flanking regions)
85 int nucUTP, nucUFP, nucUFN, // UTR bases
86 nucUFPinside; // false positive noncoding base inside gene area (as opposed to in flanking regions)
87 double nukSens, nukSpec; // coding base sensitivity and specifity
88 double nucUSens, nucUSpec; // non-coding base sensitivity and specifity
89 double exonSens, exonSpec; // coding exon sensitivity and specificity
90 double UTRexonSens, UTRexonSpec; // exon sensitivity and specificity
91 double geneSens, geneSpec; // gene sensitivity and specifity
92private:
93 //TP = true positive, FP = false positive, FN = false negative
94 int leftFlankEnd, rightFlankBegin;
95 list<Double> quotients;
96 int longestPredIntronLen;
97
98 // exon level
99 int numPredExons, numAnnoExons;
100 int numUniquePredExons, numUniqueAnnoExons;
101
102 int exonTP,
103 exonFP,
104 exonFP_partial, // predicted exon unequal to but included in an annotated exon
105 exonFP_overlapping,
106 exonFP_wrong,
107 exonFN,
108 exonFN_partial, // annotated exon unequal to but included in a predicted exon
109 exonFN_overlapping,
110 exonFN_wrong;
111
112 // gene level
113 int geneTP, geneFP, geneFN;
114 int numPredGenes, numAnnoGenes;
115
116 // UTR level
117 int *tssDist; // array that holds for each distance the number of predicted TSS that is off by this distance
118 int numTSS; // number of gene pairs (anno, pred) with identical translation start and where both have an annotated TSS
119 int numTotalPredTSS;
120 double meanTssDist;
121 int medianTssDist;
122 int *ttsDist; // array that holds for each distance the number of predicted TTS that is off by this distance
123 int numTTS; // number of gene pairs (anno, pred) with identical stop codon and where both have an annotated TTS
124 int numTotalPredTTS;
125 double meanTtsDist;
126 int medianTtsDist;
127 int numPredUTRExons, numAnnoUTRExons;
128 int numUniquePredUTRExons, numUniqueAnnoUTRExons;
129 int UTRexonTP, UTRexonFP, UTRexonFN;
130 int UTRoffThresh; // count UTR exon as correct, if one end is exact and the other end at most this many bp off
131 /*
132 * data members for the "Burge-Karlin"-Method computing first the
133 * specifity and sensitivity for each sequence and then taking their
134 * means afterwards
135 */
136
137 int numDataSets;
138 // nucleotide level
139 int nukTPBK, nukFPBK, nukFPBKinside, nukFNBK;
140
141 // exon level
142 int exonTPBK, exonFPBK, exonFNBK;
143};
144
145/*
146 * predictAndEvaluate
147 *
148 * Predict genes on the given set of annotated sequences given the current parameters.
149 * Then evaluate the accuracy against the given annotation.
150 */
151Evaluation* predictAndEvaluate(vector<AnnoSequence*> trainGeneList, FeatureCollection &extrinsicFeatures);
152
153#endif // _EVALUATION_HH
evaluation of the prediction
Definition evaluation.hh:28
Definition extrinsicinfo.hh:314
This class implements a double object with a very large range.
Definition lldouble.hh:31
Definition gene.hh:101
Definition gene.hh:250