Augustus 3.4.0
Loading...
Searching...
No Matches
contTimeMC.hh
1/*
2 * contTimeMC.hh
3 * @author Mario Stanke
4 * @author Stefanie König
5 *
6 * License: Artistic License, see file LICENSE.TXT or
7 * https://opensource.org/licenses/artistic-license-1.0
8 */
9
10#ifndef _CONTTIMEMC_HH
11#define _CONTTIMEMC_HH
12
13#include "matrix.hh"
14
15// standard C/C++ includes
16#include <cmath>
17#include <iostream>
18#include <gsl/gsl_matrix.h>
19#include <gsl/gsl_eigen.h>
20#include <gsl/gsl_linalg.h>
21
22
23using namespace std;
24
31class Evo {
32public:
33 Evo(int s) : states(s), m(0), pi(NULL) {};
34 virtual ~Evo();
35 int getNumStates(){return states;}
36
37 /* Determine the branch lengths for which matrices P should be stored, at most m.
38 * For trees with few species, say <10, this could be all branch lengths b occurring
39 * in the tree. A value of m=-1 means to store all lengths in b. Memory may be saved
40 * by building m clusters roughly representing all lengths in b.
41 */
42 void setBranchLengths(vector<double> b, int m=-1);
43 void printBranchLengths();
44
45 virtual void getRateMatrices() {}; // precomputes or reads the rate matrices
46 virtual void computeLogPmatrices()=0; // precomputes and stores the array of matrices
47 virtual void addBranchLength(double b)=0; //add new branch length b and matrix P(b)
48
49 double getPi(int i) const {return pi[i];} //equilibrium frequency of state i
50 double getLogPi(int i) const {return log(getPi(i));}
51
52 gsl_matrix *getSubMatrixQ(int u){ return allQs[u]; }
53
54 /*
55 * Returns a pointer to the probability matrix for which t comes closest.
56 * u is the index to a second parameter for which the probability matrices
57 * are stored for different values.
58 * in the case of codon evolution u is the index to omega (dN/dS, selection)
59 * in the case of exon evolution probability matrices currently only
60 * depend on t and thus u is set to 0. It is however conceivable
61 * to allow different rates of exon loss in future.
62 */
63 gsl_matrix *getSubMatrixP(int u, double t);
64 gsl_matrix *getSubMatrixLogP(int u, double t);
65
66 /*
67 * compute the matrix exponential P(t) = exp(Q*t),
68 * where Q is given by U, diag(lambda) and U^{-1} as computed by eigendecompose
69 */
70 gsl_matrix *expQt(double t, gsl_vector *lambda, gsl_matrix *U, gsl_matrix *Uinv);
71
72protected:
73 int findClosestIndex(vector<double> &v, double val);
74
75protected:
76 const int states; //number of states (64 in codon model and (currently) 2 in exon model)
77 int m; // number of branch lengths (times) for which P's are stored
78 double *pi;
79 vector<double> times; // sorted vector of branch lengths
80 vector<gsl_matrix *> allQs; // rate matrices
81 Matrix<gsl_matrix *> allPs; // Parameterized probability matrices
82 Matrix<gsl_matrix *> allLogPs; // Parameterized log probability matrices
83
84};
85
86/*
87 * perform a decompososition of the rate matrix as Q = U * diag(lambda) * U^{-1}
88 * returns 0 if sucessful
89 */
90int eigendecompose(gsl_matrix *Q, // input
91 double *pi, // input, must be same as in construction of Q
92 gsl_vector *&lambda, // output, memory for lambda, U and Uinv is allocated within the function
93 gsl_matrix *&U, // output
94 gsl_matrix *&Uinv); // output
95
96// eigen decomposition was successful, if Q - U * diag(lambda) * Uinv is close to the 0-matrix
97void validateEigenDecomp(gsl_matrix *Q, gsl_vector *lambda, gsl_matrix *U, gsl_matrix *Uinv, int states);
98
99void printCodonMatrix(gsl_matrix *M);
100
101/*
102 * computes the element-wise natrual logarithm of a matrix P
103 */
104gsl_matrix *log(gsl_matrix *P, int states);
105
106/*
107 * @brief class ExonEvo
108 * @details Computes and stores 2x2 probability matrices for exon gain/loss events
109 * P(t) = exp(Q(lambda, mu)*t) for a sample of values for t
110 * and a fixed given value for lambda (rate of exon gain) and mu (rate of exon loss)
111 * Matrices are precomputed and retrieval of P(t) is then a constant time lookup
112 *
113 * @author Mario Stanke
114 * @author Stefanie König
115 */
116class ExonEvo : public Evo{
117
118public:
119 ExonEvo(int num_states=2) : Evo(num_states), U(NULL), Uinv(NULL), l(NULL) {
120 setLambda();
121 setMu();
122 setAliErr();
123 // mu, lambda and ali_error have to be set before calculating equilibrium frequencies Pi
124 setPi();
125 };
126 ExonEvo(int num_states, double lambda, double mu, double ali_error)
127 : Evo(num_states), U(NULL), Uinv(NULL), l(NULL) {
128 setLambda(lambda, false);
129 setMu(mu, false);
130 setAliErr(ali_error, false);
131 // mu, lambda and ali_error have to be set before calculating equilibrium frequencies Pi
132 setPi();
133 };
134 ~ExonEvo(){
135 if (U)
136 gsl_matrix_free(U);
137 if (Uinv)
138 gsl_matrix_free(Uinv);
139 if (l)
140 gsl_vector_free(l);
141 }
142 void setPi();
143 void setLambda(double lambda = 0.0001, bool propsOverride = true);
144 void setMu(double mu = 0.0001, bool propsOverride = true);
145 void setAliErr(double ali_error = 0.1, bool propsOverride = true);
146 double getLambda() const{return lambda;}
147 double getMu() const{return mu;}
148 double getAliErr() const{return ali_error;}
149
150 void getRateMatrices(); // precomputes or reads the rate matrices
151 void computeLogPmatrices();
152 void addBranchLength(double b); //add new branch length b and matrix P(b)
153 double minBranchLength() const {return times.front();}
154 int eigendecompose(gsl_matrix *Q);
155 gsl_matrix *expQt(double t) {return Evo::expQt(t,l,U,Uinv);}
156 gsl_matrix *getExonRateMatrix();
157 void validateEigenDecomp(gsl_matrix *Q){::validateEigenDecomp(Q,l,U,Uinv,states);}
158
159private:
160 double mu; // rate for exon loss
161 double lambda; // rate for exon gain
162 double ali_error; // rate for alignment error
163
164 // eigen decomposition of exon rate matrix Q = U * diag(l_1,...,l_n) * Uinv
165 gsl_matrix *U;
166 gsl_matrix *Uinv;
167 gsl_vector *l;
168};
169
170/*
171 * @brief class Parsimony
172 * @details transitions from state i to state j are independent of the branch length
173 * and cost -1 if i!=j and 0 if i==j.
174 * can be used to calculate the minimum number of codon substitutions
175 * for a given tree topology
176 *
177 * @author Mario Stanke
178 * @author Stefanie König
179 */
180class Parsimony : public Evo{
181
182public:
183 Parsimony() : Evo(64) {
184 this->m = 1;
185 this->times.push_back(1);
186 this->pi = new double[64];
187 for(int i=0; i<64; i++){
188 pi[i]=1;
189 }
190 };
191 void computeLogPmatrices();
192 void addBranchLength(double b){}
193};
194#endif // _CONTTIMEMC_HH
abstract base class for codon and exon evolution
Definition contTimeMC.hh:31
Definition contTimeMC.hh:116
A simple matrix class. Base class for all mathematical matrix objects.
Definition matrix.hh:27
Definition contTimeMC.hh:180