Augustus 3.4.0
Loading...
Searching...
No Matches
graph.hh
1/*
2 * graph.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 */
7
8#ifndef _GRAPH_HH
9#define _GRAPH_HH
10
11#include <map>
12#include <sstream>
13#include <queue>
14#include <iostream>
15#include <stack>
16#include "gene.hh"
17#include "properties.hh"
18#include "exoncand.hh"
19
20using namespace std;
21
22#define NUM_STATENAMES 6
23
27enum Statename{type_unknown=-1, CDS, utr3, utr5, intron, utr3Intron, utr5Intron};
28
29extern string stateNameIdentifiers[NUM_STATENAMES];
30
31/* types of nodes:
32 * sampled exons and introns: sampled
33 * additional exons, which are not sampled: unsampled_exons
34 * neutral nodes: IR, plus0, plus1, plus2, minus0, minus1, minus2 (for each of the 7 neutral lines one type)
35 * NOT_KNOWN: default type, for example head and tail
36 */
37#define NUM_NODETYPES 27
38
48enum NodeType{NOT_KNOWN=-1, IR,
49 plus0, plus1, plus2, minus0, minus1, minus2, T_plus1, TA_plus2, TG_plus2, T_minus1, C_minus1, YY_minus0, // intron types between two CDS exons
50 ncintr, rncintr, // intron between two non-coding exons
51 utr5intr, TLstart, TLstop, utr3intr, rutr5intr, rTLstart, rTLstop, rutr3intr, utrExon, // utr types
52 sampled, unsampled_exon}; // CDS types
53
54extern string nodeTypeIdentifiers[NUM_NODETYPES];
55
61class Status;
62
66class Node;
67
71class Edge;
72
76class Graph;
77
78class Status{
79public:
80 Status(Statename s=type_unknown, int b=0, int e=0, double sc=0.0, const void *it=NULL, Status *n=NULL):
81 name(s),
82 begin(b),
83 end(e),
84 score(sc),
85 next(n),
86 item(it)
87 {}
88 Statename name;
89 int begin, end;
90 double score;
91 Status *next;
92 const void *item;
93
94 bool isIntron() const {return (name >= intron);}
95 bool isExon() const {return (name >= CDS && name <= utr5);}
96 bool isCDS() const {return (name == CDS);}
97 bool isUTR() const {return (name == utr3 || name == utr5);}
98 float getPostProb() const {return (item)? ((State*)item)->apostprob : 0.0;}
99 int getLen() const {return end-begin+1;}
100 int getFrame() const {return isCDS()? ((State*)item)->frame() : -1;}
101 bool hasEvidence() const {return ((State*)item)->evidence;}
102 bool hasEvidence(string srcname) const; //returns true if the exon/intron has evidence from src="srcname"
103 int numEvidence() const {return hasEvidence()? ((State*)item)->evidence->numEvidence : 0;}
104};
105
106bool isTlstartOrstop(Status *predExon, Status *succExon);
107
108class Node{
109public:
110 Node(int s=0, int e=0, float sc=0.0, const void *it=NULL, NodeType t=NOT_KNOWN, Node *p=NULL, bool b=0, Node *nn=NULL, Node *pn=NULL):
111 begin(s),
112 end(e),
113 score(sc),
114 item(it),
115 n_type(t),
116 pred(p),
117 label(b),
118 nextNontrivialNeutNode(nn),
119 prevNontrivialNeutNode(pn)
120 {}
121 int begin, end;
122 float score;
123 const void *item;
124 NodeType n_type;
125 Node *pred;
126 bool label; // label is 1, if node is in path, else label is 0
127 list<Edge> edges;
128 Node *nextNontrivialNeutNode; // pointer to Node on neutral line, which is the nearest that can be reached by a nontrivial way (used for new back edges)
129 Node *prevNontrivialNeutNode; // pointer to Node on neutral line, from which the actual node is the nearest that can be reached by a nontrivial way (used for new back edges)
130 size_t index; // (used for new back edges) (used for tarjan)
131 size_t lowlink; // (used for tarjan)
132
133 StateType castToStateType(); //casts void* back to State* and returns the StateType
134 void addWeight(float weight); //add weight to all outgoing edges
135 Edge* getEdge(Node* succ);
136 State* getIntron(Node* succ);
137 bool isSampled() const {return(n_type == sampled || n_type == utrExon);}
138
139};
140
141class Edge{
142public:
143 Edge(Node *t=NULL, bool n=true, float sc=0.0, const void *it=NULL):
144 to(t),
145 score(sc),
146 neutral(n),
147 item(it)
148 {}
149 Node *to;
150 float score;
151 bool neutral;
152 const void *item;
153
154 inline bool isSampledIntron(){
155 return item;
156 }
157};
158
159//print functions for Nodes and Edges
160ostream& operator<<(ostream& ostrm, Node *node);
161ostream& operator<<(ostream& ostrm, const Edge &edge);
162
163/*
164 * ordering needed by statelist:
165 * the states of the sampled genes need to be together ordered after their beginning for each gene.
166 * example: 5'UTR-CDS-intron-CDS-intron-CDS-3'UTR-5'UTR-CDS-intron-CDS-3'UTR
167 * here we have 2 Genes and states in order of appearance
168 */
169
170class Graph{
171public:
172 Graph(list<Status> *states) : statelist(states) {}
173 virtual ~Graph();
174 void addBackEdges();
175 void addBackEdgesComp();
176 void tarjan(); // Tarjan's strongly connected components algorithm
177 void tarjanAlg(Node* from, stack<Node*> &S, size_t &index);
178
179 list<Node*> nodelist; //stores all nodes belonging to the graph
180 list<Status> *statelist;
181 int min, max;
182 map<string,Node*> existingNodes;
183 Node *head;
184 Node *tail;
185
186 void buildGraph(); //needs to be called in constructor of derived class
187
188 template<class T> inline bool alreadyProcessed(T *temp){
189 return(existingNodes[getKey(temp)]!=NULL);
190 }
191 inline bool alreadyProcessed(string key){
192 return(existingNodes[key]!=NULL);
193 }
194 template<class T> inline Node* getNode(T *temp){
195 return existingNodes[getKey(temp)];
196 }
197 inline Node* getNode(string key){
198 return existingNodes[key];
199 }
200 // functions needed to build the graph
201protected:
202 bool edgeExists(Node *e1, Node *e2);
203 inline void addToHash(Node *n){
204 existingNodes[getKey(n)] = n;
205 }
206 Node* addExon(Status *exon, vector<Node*> &neutralLine);
207 void addPair(Status *exon1, Status *exon2, vector<Node*> &neutralLine);
208 void createNeutralLine(vector<Node*> &neutralLine, double weight=0.0, bool onlyComplete=false);
209 void addCompatibleEdges();
210 void insertIntron(Node *exon1, Node *exon2);
211 int minInQueue(queue<Node*> *q);
212 bool nonneutralIncomingEdge(Node *exon);
213 void printGraphToShell();
214 void getSizeNeutralLine();
215 void addWeightToEdge();
216
217 // program specific functions
218 virtual bool exonAtGeneStart(Status *st)=0;
219 virtual bool exonAtGeneEnd(Status *st)=0;
220 virtual string getKey(Node *n)=0;
221 virtual string getKey(Status *st)=0;
222 virtual string getKey(State *st)=0;
223 virtual string getKey(ExonCandidate *exoncand)=0;
224 virtual double getIntronScore(Status *predExon, Status *nextExon)=0;
225 virtual void addEdgeFromHead(Status *exon)=0;
226 virtual void addEdgeToTail(Status *exon)=0;
227 virtual bool compatible(Node *exon1, Node *exon2)=0;
228 virtual double setScore(Status *st)=0;
229 virtual void calculateBaseScores()=0;
230 virtual void printGraph(string filename)=0;
231 virtual void printGraph2(string filename)=0;
232 virtual bool mergedStopcodon(Node* exon1, Node* exon2)=0;
233 virtual bool mergedStopcodon(Status* exon1, Status* exon2)=0;
234 virtual bool mergedStopcodon(StateType type1, StateType type2, int end1, int begin2)=0;
235 virtual float getAvgBaseProb(Status *st) =0;
236 virtual float getAvgBaseProb(ExonCandidate *ec)=0;
237
238};
239
243class AugustusGraph : public Graph{
244
245public:
246 AugustusGraph(list<Status> *states, const char* dna) : Graph(states), sequence(dna), seqlength(strlen(dna)){
247 try {
248 utr = Properties::getBoolProperty("UTR");
249 } catch (...) {
250 utr = false;
251 }
252 try {
253 alpha_e = Properties::getdoubleProperty("/MeaPrediction/alpha_E");
254 } catch (...) {
255 alpha_e = 1;
256 }
257 try {
258 alpha_i = Properties::getdoubleProperty("/MeaPrediction/alpha_I");
259 } catch (...) {
260 alpha_i = 1;
261 }
262 try {
263 x0_e = Properties::getdoubleProperty("/MeaPrediction/x0_E");
264 } catch (...) {
265 x0_e = -10;
266 }
267 try {
268 x0_i = Properties::getdoubleProperty("/MeaPrediction/x0_I");
269 } catch (...) {
270 x0_i = -10;
271 }
272 try {
273 x1_e = Properties::getdoubleProperty("/MeaPrediction/x1_E");
274 } catch (...) {
275 x1_e = 10;
276 }
277 try {
278 x1_i = Properties::getdoubleProperty("/MeaPrediction/x1_I");
279 } catch (...) {
280 x1_i = 10;
281 }
282 try {
283 y0_e = Properties::getdoubleProperty("/MeaPrediction/y0_E");
284 } catch (...) {
285 y0_e = 0.5;
286 }
287 try {
288 y0_i = Properties::getdoubleProperty("/MeaPrediction/y0_I");
289 } catch (...) {
290 y0_i = 0.5;
291 }
292 try {
293 i1_e = Properties::getdoubleProperty("/MeaPrediction/i1_E");
294 } catch (...) {
295 i1_e = 0.25;
296 }
297 try {
298 i1_i = Properties::getdoubleProperty("/MeaPrediction/i1_I");
299 } catch (...) {
300 i1_i = 0.25;
301 }
302 try {
303 i2_e = Properties::getdoubleProperty("/MeaPrediction/i2_E");
304 } catch (...) {
305 i2_e = 0.75;
306 }
307 try {
308 i2_i = Properties::getdoubleProperty("/MeaPrediction/i2_I");
309 } catch (...) {
310 i2_i = 0.75;
311 }
312 try {
313 j1_e = Properties::getdoubleProperty("/MeaPrediction/j1_E");
314 } catch (...) {
315 j1_e = -5;
316 }
317 try {
318 j1_i = Properties::getdoubleProperty("/MeaPrediction/j1_I");
319 } catch (...) {
320 j1_i = -5;
321 }
322 try {
323 j2_e = Properties::getdoubleProperty("/MeaPrediction/j2_E");
324 } catch (...) {
325 j2_e = 5;
326 }
327 try {
328 j2_i = Properties::getdoubleProperty("/MeaPrediction/j2_I");
329 } catch (...) {
330 j2_i = 5;
331 }
332 try {
333 r_be = Properties::getdoubleProperty("/MeaPrediction/r_be"); // threshold for exons on base level
334 } catch (...) {
335 r_be = 0.5;
336 }
337 try {
338 r_bi = Properties::getdoubleProperty("/MeaPrediction/r_bi"); // threshold for introns on base level
339 } catch (...) {
340 r_bi = 0.5;
341 }
342
343 for(int i = 0; i < seqlength*10; i++)
344 baseScore.push_back(0);
345
346 }
347 bool exonAtGeneStart(Status *st);
348 bool exonAtGeneEnd(Status *st);
349 bool exonAtCodingStart(Node *st);
350 bool exonAtCodingEnd(Node *st);
351 string getKey(Node *n);
352 string getKey(Status *st);
353 string getKey(State *st);
354 string getKey(ExonCandidate *exoncand);
355 double getIntronScore(Status *predExon, Status *nextExon);
356 void addEdgeFromHead(Status *exon);
357 void addEdgeToTail(Status *exon);
358 bool compatible(Node *exon1, Node *exon2);
359 bool sameStrand(StateType typeA, StateType typeB);
360 bool sameReadingFrame(Node *e1, Node *e2);
361 void calculateBaseScores();
362 double setScore(Status *st);
363 int getBasetype(Status *st, int pos);
364 void printGraph(string filename);
365 void printGraph2(string filename);
366 bool mergedStopcodon(Node* exon1, Node* exon2);
367 bool mergedStopcodon(Status* exon1, Status* exon2);
368 bool mergedStopcodon(StateType type1, StateType type2, int end1, int begin2);
369 void getPoints(Status *st, double p, double *a1, double *a2, double *b1, double *b2);
370 float getAvgBaseProb(Status *st);
371 float getAvgBaseProb(ExonCandidate *ec);
372
373 const char* sequence;
374 int seqlength;
375 vector<double> baseScore;
376 bool utr;
377
378 // parameters for scores
379 double alpha_e;
380 double alpha_i;
381 double x0_e;
382 double x0_i;
383 double x1_e;
384 double x1_i;
385 double y0_e;
386 double y0_i;
387 double i1_e;
388 double i1_i;
389 double i2_e;
390 double i2_i;
391 double j1_e;
392 double j1_i;
393 double j2_e;
394 double j2_i;
395 double r_be;
396 double r_bi;
397};
398
399bool compareNodes(Node *first, Node *second);
400bool compareEdges(Edge first, Edge second);
401bool compareNodeEnds(const Node *first, const Node *second);
402
403template <class T>
404inline string to_string (const T& t)
405{
406 stringstream ss;
407 ss << t;
408 return ss.str();
409}
410
411#endif
Definition graph.hh:243
Definition graph.hh:141
Generation of exon candidates (=possible exons)
Definition exoncand.hh:45
Definition graph.hh:170
Definition graph.hh:108
Definition gene.hh:101
Definition graph.hh:78