Augustus 3.4.0
Loading...
Searching...
No Matches
speciesgraph.hh
1/*
2 * speciesgraph.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 */
7
8#ifndef _SPECIESGRAPH
9#define _SPECIESGRAPH
10
11#include<unordered_map>
12
13//project includes
14#include "graph.hh"
15
16//forward declarations
17
21class Move;
22
33
34private:
35 AnnoSequence *seqRange;
36 list<ExonCandidate*> *additionalExons; //exons, which are not sampled
37 string speciesname;
38 Strand strand;
39 bool genesWithoutUTRs;
40 bool onlyCompleteGenes;
41 double ec_score; //temp: until there are real scores for exon candidates
42 ofstream *sampled_GFs; // output file of sampled exons/introns
43 bool overlapComp;
44
45
46public:
47 SpeciesGraph(list<Status> *states, AnnoSequence *seq, list<ExonCandidate*> *addEx, string name, Strand s, bool u, bool o, ofstream *gf, bool ov = false) :
48 AugustusGraph(states, seq->sequence),
49 seqRange(seq),
50 additionalExons(addEx),
51 speciesname(name),
52 strand(s),
53 genesWithoutUTRs(u),
54 onlyCompleteGenes(o),
55 sampled_GFs(gf),
56 overlapComp(ov)
57 {
58 try {
59 ec_score = Properties::getdoubleProperty("/CompPred/ec_score");
60 } catch (...) {
61 ec_score = alpha_e * x0_e - r_be;
62 }
63 }
65 // AnnoSequence must be deleted by caller (it is needed at other places)
66 }
67 using AugustusGraph::getKey;
68
69 /*
70 * functions to build graph
71 */
72
73 void buildGraph(double meanIntrLen=numeric_limits<double>::max());
74 /*
75 * @getKey(): the key of auxilary nodes is 'PosBegin:n_type'
76 * the key of nodes representiong Exons is 'PosBegin:PosEnd:StateType'
77 */
78 string getKey(Node *n);
79 void printGraph(string filename); // prints graph in dot-format
80 void printSampledGF(Status *st, double score=0); //prints sampled exons/introns to display them with gBrowse
81 void printGF(ExonCandidate *ec, double score=0.0, float avgBaseProb=0.0);
82 string getSpeciesname() const {return speciesname;}
83 char* getSequence() const {return seqRange->sequence;}
84 char* getSeqID() const {return seqRange->seqname;}
85 int getSeqOffset() const {return seqRange->offset;}
86 int getSeqLength() const {return seqRange->length;}
87 Strand getSeqStrand() const {return strand;}
88 AnnoSequence* getAnnoSeq() const {return seqRange;}
89 void printCurrentPath();
90 /*
91 * maximum weight path problem related functions
92 */
93
94 void topSort(); // sorts nodelist of graph topologically
95 void dfs(Node* node); // depth first search, subroutine of topSort()
96 double relax(); // relaxation of all nodes
97
98 // static functions
99 static void setECThold(double t){ec_thold=t;}
100 static void setICThold(double t){ic_thold=t;}
101 static void printWeights();
102
103 /*
104 * parameters that define the scoring function of exons and introns
105 * score of exons: ec_thold + a_0x_0 + a_1x_1 + ... + a_nx_n,
106 * where x_i are derived from the following set of features: posterior prob, average base prob, log length
107 * diversity, omega, variance of omega, conseration, containment, #species in a HECT
108 * and the a_i are parameters trained by logistic regression
109 */
110 static double ec_thold; // ec_thold + a_0*x_0 + a_1*x_1 + ... + a_n*x_n
111 static double ic_thold; // ic_thold + a_0*x_0 + a_1*x_1 + ... + a_n*x_n
112 static double maxCostOfExonLoss;
113
114 /*
115 * old code
116 */
117 /*double localChange(Move *move); // local path search with calculation of the score difference between new and old local path
118 double getScorePath(Node *begin, Node *end); // calc. the sum of edge weights of the path: begin ~~> end
119 Node* getTopSortPred(Node *node); //get next node in topological order which is on the path
120 Node* getTopSortNext(Node *node); //get previous node in topological order which is on the path
121 Node* getNextExonOnPath(Node *node, size_t step); // returns the i-th next exon on the current path, where i is the step size (size_t step)
122 Node* getPredExonOnPath(Node *node, size_t step); // returns the i-th preceding exon on the current path
123 void printNode(Node *node); //print Node with offset
124 */
125
126private:
127 /*
128 * subroutines of buildGraph()
129 */
130 template<class T> Node* addExon(T *exon, vector< vector<Node*> > &neutralLines, unordered_map<int32_t,Node*> &auxiliaryNodes); // add an exon to the graph
131 void addIntron(Node* pred, Node* succ, Status *intr); // add an intron to the graph
132 Node* addLeftSS(Status *exon, vector< vector<Node*> > &neutralLines, unordered_map<int32_t,Node*> &auxiliaryNodes);
133 Node* addRightSS(Status *exon, vector< vector<Node*> > &neutralLines, unordered_map<int32_t,Node*> &auxiliaryNodes);
134 Node* addAuxilaryNode(NodeType type, int pos, vector< vector<Node*> > &neutralLines, unordered_map<int32_t,Node*> &auxiliaryNodes); // add an auxilary node to the graph if it does not exist already
135 Node* getAuxilaryNode(NodeType type, int pos, unordered_map<int32_t,Node*> &auxiliaryNodes) const; // get an auxilary node (returns NULL if node does not exists)
136 void addAuxilaryEdge(Node *pred, Node *succ); // add an auxilary edge to the graph
137 Node* addAuxNodeToLine(NodeType type, int pos, vector< vector<Node*> >&neutralLines); // add an auxilary node to one of the neutral lines if it does not exist already
138 NodeType getPredType(StateType type, int begin, int end) ; // get the type of gene feature that precedes an exon
139 NodeType getSuccType(StateType type) ; // get the type of gene feature that succeeds an exon
140 list<NodeType> getPredTypes(Node *node) ; // same as above, but includes stop codon types
141 list<NodeType> getSuccTypes(Node *node) ; // same as above, but includes stop codon types
142 bool isGeneStart(Node *exon);
143 bool isGeneEnd(Node *exon);
144 Node* addNode(Status *exon); // add a sampled CDS or UTR exon
145 Node* addNode(ExonCandidate *exon); // add an additional candidate exon
146 Node* addNode(NodeType type, int pos); // add an auxilary node
147 /*
148 * subroutines of printGraph()
149 */
150 string getDotNodeAttributes(Node *node);
151 string getDotEdgeAttributes(Node *pred, Edge *edge);
152};
153
154/*
155 * old code: optimize cgp by making small moves
156 */
157/*struct MoveNode{
158
159 Node *node;
160 double weight;
161
162 MoveNode(Node* n, double w) : node(n), weight(w) {}
163 ~MoveNode() {}
164};
165
166struct MoveEdge{
167
168 Edge *edge;
169 double weight;
170
171 MoveEdge(Edge* e, double w) : edge(e), weight(w) {}
172 ~MoveEdge() {}
173};
174
175class Move{
176
177private:
178 SpeciesGraph *graph;
179 size_t step_size;
180 list<MoveNode> nodes; // sorted list of nodes
181 list<MoveEdge> edges; // sorted list of edges
182 Node* local_head;
183 Node* local_tail;
184
185public:
186 Move(SpeciesGraph *g, size_t s = 1) :
187 graph(g),
188 step_size(s),
189 local_head(NULL),
190 local_tail(NULL)
191 {}
192 ~Move() {}
193
194 Node* getHead() const {return local_head;}
195 Node* getTail() const {return local_tail;}
196 void addNodeBack(Node* node, double weight){nodes.push_back(MoveNode(node, weight));}
197 void addEdgeBack(Edge* edge, double weight){edges.push_back(MoveEdge(edge, weight));}
198 void addNodeFront(Node* node, double weight){nodes.push_front(MoveNode(node, weight));}
199 void addEdgeFront(Edge* edge, double weight){edges.push_front(MoveEdge(edge, weight));}
200 Node* getNodeBack() const {return nodes.back().node;}
201 Node* getNodeFront() const {return nodes.front().node;}
202 bool nodesIsEmpty() const {return nodes.empty();}
203 void shiftHead(size_t step = 1){local_head = graph->getPredExonOnPath(local_head,step);}
204 void shiftTail(size_t step = 1){local_tail = graph->getNextExonOnPath(local_tail,step);}
205 void addWeights();
206 void undoAddWeights();
207 void initLocalHeadandTail(); //determines local_head and local_tail
208};
209*/
210#endif // _SPECIESGRAPH
211
Definition gene.hh:548
Definition graph.hh:243
Definition graph.hh:141
Generation of exon candidates (=possible exons)
Definition exoncand.hh:45
Definition graph.hh:108
builds a directed acyclic graph from a set of sampled genes.
Definition speciesgraph.hh:32
Definition graph.hh:78