Augustus 3.4.0
Loading...
Searching...
No Matches
orthoexon.hh
1/*
2 * orthoexon.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 */
7
8#ifndef _ORTHOEXON_HH
9#define _ORTHOEXON_HH
10
11#include "exoncand.hh"
12#include "projectio.hh"
13#include "types.hh"
14#include "phylotree.hh"
15#include "contTimeMC.hh"
16#include "codonevo.hh"
17#include "codonevodiscr.hh"
18
19#include <vector>
20#include <string>
21#include <list>
22
23extern const char* phyleticPatternIdentifiers[6];
24
25//forward declarations:
29class Node;
30
36class OrthoExon {
37public:
38 OrthoExon(int_fast64_t k, size_t numSpecies);
39 ~OrthoExon() {};
40
41 // get and and set functions
42 StateType getStateType() const; // all exon candidates agree in type
43 int numExons() const;
44 double getOmega() const { return omega;}
45 double getEomega() const { return Eomega;}
46 double getVarOmega() const { return VarOmega;}
47 double getLeftExtOmega() const { return leftBoundaryExtOmega;}
48 double getRightExtOmega() const { return rightBoundaryExtOmega;}
49 double getLeftIntOmega() const { return leftBoundaryIntOmega;}
50 double getRightIntOmega() const { return rightBoundaryIntOmega;}
51 double getClamsaProb() const { return probClamsa;}
52 double getClamsaScore() const {
53 if (probClamsa < 0.0 || probClamsa > 1.0)
54 return 0.0;
55 else if (probClamsa == 0.0) {
56 return -10.0;
57 } else if (probClamsa == 1.0) {
58 return 10.0;
59 } else {
60 return log(probClamsa/(1.0-probClamsa)); // logit value
61 }
62 }
63 double getSubst() const { return subst; }
64 double getConsScore() const {return cons;}
65 double getLeftConsScore() const {return leftCons;}
66 double getRightConsScore() const {return rightCons;}
67 double getDiversity() const {return diversity;}
68 size_t getContainment() const {return containment;}
69 bool hasOmega() const {return Eomega >= 0;}
70 bool hasVarOmega() const {return VarOmega >= 0;}
71 bool hasConservation() const {return cons >= 0;}
72 bool hasContainment() const {return containment >= 0;}
73 bool hasDiversity() const {return diversity >= 0;}
74 bit_vector getBV() const {return bv;}
75
76 /* getRFC
77 * @param[in] offsets integers to add to genome positions to obtain 0-based coordinates
78 * @return vector of reading frames
79 */
80 vector<int> getRFC(vector<int> offsets) const {
81 vector<int> rfc;
82 for (size_t s = 0; s < orthoex.size(); s++){
83 if (orthoex[s] == NULL)
84 rfc.push_back(-1);
85 else
86 rfc.push_back((offsets[s] + orthoex[s]->getFirstCodingBase()) % 3);
87 }
88 return rfc;
89 }
90
91 PhyloTree* getTree() const {return tree;}
92 int getAliStart() const {return (key>>22);} // start position of HECT in alignment
93 int getAliLen() const {int aliStart=getAliStart(); int n=key-(aliStart<<22); return (n>>7);} // length of HECT + 1
94 int getAliEnd() const {return getAliStart() + getAliLen();}
95 int getStartInWindow(int s) const {return firstAlignedPos[s];}
96 int getEndInWindow(int s) const {return lastAlignedPos[s];}
97 bool exonExists(int pos) const; // returns true if OE has a candidate exon at position pos
98 bool isUnaligned(int i) const {return labels[i] == 3;} // true, if species i is not aligned
99 bool isAbsent(int i) const {return labels[i] == 2;} // true, if species i is aligned, but ECs is absent
100 void setPresent(bit_vector v);
101 void setAbsent(bit_vector v);
102 void setOmega(double o){omega=o;}
103 void setOmega(vector<double>* llo, CodonEvo* codonevo, bool oeStart);
104 void storeOmega(double currOmega, double currVarOmega);
105 void setClamsa(const cumValues &cv, CodonEvoDiscr* codonevo, bool oeStart);
106 void setClamsa2(const cumValues &cv, CodonEvoDiscr* codonevo);
107 void storeClamsa(double currClamsa);
108 void setSubst(int s){ subst=s;}
109 void setSubst(int subs, bool oeStart);
110 void setConsScore(double c){cons=c;}
111 void setLeftConsScore(double c){leftCons=c;}
112 void setRightConsScore(double c){rightCons=c;}
113 void setDiversity(double d){diversity=d;}
114 void setContainment(int c) { containment = c; }
115 void setTree(PhyloTree* t);
116 void setBV(bit_vector b){bv = b;}
117 string getPhyleticPattern() const; // phyletic pattern: for an explanation, see .cc file
118 void setPhyleticPattern(map<int, list<int> > &pp_init, map<int, list<int> > &pp_opt);
119 vector<int> getRFC(vector<int> offsets);
120 double getLogRegScore() const;
121
122 vector<ExonCandidate*> orthoex;
123 vector<Node*> orthonode; //corresponding nodes in the graph
124 vector<double> weights;
125 /*
126 * labels:
127 * 0 - EC present, but not predicted as exon
128 * 1 - EC present and predicted as exon
129 * 2 - EC absent, but alignment present
130 * 3 - alignment not present
131 */
132 vector<int> labels;
133 int ID;
134 vector<int> firstAlignedPos;
135 vector<int> lastAlignedPos;
136
137private:
138 int_fast64_t key; // key encodes all of: aliStart aliEnd type lenMod3
139 double omega;
140 double Eomega;
141 double VarOmega;
142 double leftBoundaryExtOmega;
143 double rightBoundaryExtOmega;
144 double leftBoundaryIntOmega;
145 double rightBoundaryIntOmega;
146
147 double probClamsa;
148 double leftBoundaryExtClamsa;
149 double rightBoundaryExtClamsa;
150 double leftBoundaryIntClamsa;
151 double rightBoundaryIntClamsa;
152
153 list<vector<double> > loglikOmegaStarts;
154 list<vector<double> > loglikClamsaStarts;
155
156 int intervalCount;
157 int intervalCountClamsa;
158 int subst;
159 double cons; // conservation score
160 double leftCons; // conservation score of left boundary feature
161 double rightCons; // conservation score of right boundary feature
162 double diversity; // sum of branch lengths of the subtree induced by the OrthoExon (measure of phylogenetic diversity)
163 size_t containment; // how many bases overhang on average has the largest OrthoExon that includes this one in the same frame
164 bit_vector bv; // stores in one bit for each species its absence/presence (0/1)
165 PhyloTree *tree; // corresponding tree topology of an OE
166};
167
168/*
169 * read and write functions for orthologous exons
170 * TODO: - allow orthologous exons to be on different strands
171 * - substract offset; on reverse strand, start/end positions have to be made relative to the
172 * reverse complement
173 */
174// old code:
175//std::list<OrthoExon> readOrthoExons(std::string filename); //reads list of orthologous exons from a file
176//void writeOrthoExons(const std::list<OrthoExon> &all_orthoex);
177
178
179std::ostream& operator<<(std::ostream& ostrm, const OrthoExon &ex_tuple);
180std::istream& operator>>(std::istream& istrm, OrthoExon& ex_tuple);
181
182#endif
class CodonEvoDiscr
Definition codonevodiscr.hh:35
class CodonEvo
Definition codonevo.hh:30
Definition graph.hh:108
maintains orthologous exons for comparative gene prediction
Definition orthoexon.hh:36
Definition phylotree.hh:104
Definition types.hh:903