Augustus 3.4.0
Loading...
Searching...
No Matches
codonevo.hh
1/*
2 * codonevo.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 */
7
8#ifndef _CODONEVO_HH
9#define _CODONEVO_HH
10
11#include "contTimeMC.hh"
12#include "geneticcode.hh"
13
14//forward declarations
15class PhyloTree;
16
30class CodonEvo : public Evo {
31public:
32 CodonEvo() : Evo(64), k(0), kappa(4.0) {
33 // initialize pi with the uniform distribution on all sense codons
34 pi = new double[64];
35 double normsum = 0.0;
36 for (unsigned c=0; c<64; c++){
37 if (GeneticCode::translate(c) == '*')
38 pi[c] = 0.0;
39 else
40 pi[c] = 1.0;
41 normsum += pi[c];
42 }
43
44 for (int c=0; c<64; c++)
45 pi[c] /= normsum;
46 };
47 ~CodonEvo();
48 void setKappa(double kappa){ this->kappa = kappa;} // transition/transversion ratio
49 void setPi(double *pi); // codon usage
50
51 /*
52 * Chooses k values for omega around 1, e.g. 0.8, 1, 1.2 for k=3.
53 * Later, one can see which of these values of omega gives the maximum likelihood.
54 */
55 void setOmegas(int k);
56 void setPrior(double sigma = 0.2);
57 double getPrior(int u){return omegaPrior[u];}
58 vector<double> *getOmegas(){return &omegas;}
59 double getOmega(int u){return omegas[u];}
60 int getK(){ return k;}
61 void printOmegas();
62 void setAAPostProbs();
63 void getRateMatrices(); // precomputes or reads the rate matrices
64 void writeRateMatrices(string filename); // serialize to file
65 void readRateMatrices(string filename); // deserialize from file
66 void computeLogPmatrices(); // precomputes and stores the array of matrices
67
68
69 /*
70 * Returns a pointer to the 64x64 substitution probability matrix
71 * for which t and omega come closest to scored values.
72 */
73 gsl_matrix *getSubMatrixLogP(double omega, double t);
74
75 /*
76 * Computes the substitution probability
77 * P( X(t)=to | X(0)=from, omega), where 1 <= from,to <= 64 are codons
78 * If you want to call this for many values of 'from' and 'to', use rather getSubMatrixP above.
79 */
80 double subLogProb(int from, int to, double omega, double t){
81 gsl_matrix *P = getSubMatrixLogP(omega, t);
82 return gsl_matrix_get(P, from, to);
83 }
84 /*
85 * log likelihood of exon candidate pair e1, e1 (required: equal number of codons, gapless)
86 * Used for testing. The pruning algorithm requires tuples of codons instead.
87 */
88 double logLik(const char *e1, const char *e2, int u, double t); // output variables
89 /*
90 * Estimate selection omega on pair of codon sequences.
91 */
92 double estOmegaOnSeqPair(const char *e1, const char *e2, double t, // input variables
93 int& numAliCodons, int &numSynSubst, int &numNonSynSubst); // output variables
94 /*
95 * Estimate omega on a sequence of codon tuples.
96 * only for testing, may need adjustment
97 */
98 double estOmegaOnSeqTuple(vector<string> &seqtuple, PhyloTree *tree,
99 int &subst); // output variables
100 // store log likeliehoods for all omegas for one codon tuple
101
102 vector<double> loglikForCodonTuple(vector<string> &seqtuple, PhyloTree *ctree){
103 int i = 0;
104 return loglikForCodonTuple(seqtuple, ctree, NULL, i);
105 }
106 vector<double> loglikForCodonTuple(vector<string> &seqtuple, PhyloTree *ctree, PhyloTree *tree, int &subs);
107 // for GRK proposal
108 double graphOmegaOnCodonAli(vector<string> &seqtuple, PhyloTree *tree, int refSpeciesIdx = 0);
109 /*
110 * add new branch length b
111 * this function is currently not needed, since the pruning algorithm does change the phylogenetic tree
112 */
113 void addBranchLength(double b){}
114
115private:
116 int k; // number of different omega values for which P's are stored
117 double kappa;
118 vector<double> omegas; // sorted vector of omegas (contains values below, around and above 1)
119 vector<double> omegaPrior; // prior distribution on omega, centered at 1
120 vector<double> aaUsage; // amino acid usage for incorporation into rate matrix
121 vector<vector<double> > aaPostProb; // retreived from BLOSUM (amino acid substitution rate matrix)
122};
123
124/*
125 * 64x64 rate matrix Q for codon substitutions as in Yang, "Computational Molecular Evolution", (2.7)
126 */
127
128gsl_matrix *getCodonRateMatrix(double *pi, // codon usage, normalized vector with 64 elements
129 double omega, // dN/dS, nonsynonymous/synonymous ratio
130 double kappa, // transition/transversion ratio, usually >1
131 vector<vector<double> > *aaPostProb = NULL); // posterior probs for AA substitutions
132
133gsl_matrix *getNonCodingRateMatrix(vector<double> *pi_nuc, double kappa); // rate matrix for non-coding model
134
135#endif // _CODONEVO_HH
class CodonEvo
Definition codonevo.hh:30
abstract base class for codon and exon evolution
Definition contTimeMC.hh:31
Definition phylotree.hh:104