Augustus 3.4.0
Loading...
Searching...
No Matches
intronmodel.hh
1/*
2 * intronmodel.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 */
7
8#ifndef _INTRONMODEL_HH
9#define _INTRONMODEL_HH
10
11#include "statemodel.hh"
12
13
20class IntronModel : public StateModel {
21public:
24
25 StateType getStateType () const {
26 return itype;
27 }
28
35 void buildModel( const AnnoSequence* annoseq, int parIndex );
36 void registerPars( Parameters* parameters);
42 void printProbabilities( int parIndex, BaseCount *bc, const char* suffix = NULL );
43 void initAlgorithms(Matrix<Double>&, int);
44 virtual void updateToLocalGCEach( Matrix<Double>& trans, int cur);
45 void viterbiForwardAndSampling(ViterbiMatrixType&, ViterbiMatrixType&, int, int,
46 AlgorithmVariant, OptionListItem&);
47 Double emiProbUnderModel (int begin, int end) const;
48 Double seqProb (int left, int right) const;
49 static Double dSSProb (int base, bool forwardStrand);
50 static Double aSSProb (int base, bool forwardStrand);
51 static void init();
52 static void resetPars() {
53 initSnippetProbs();
54 initAlgorithmsCalled = false;
55 }
56 static void updateToLocalGC(int from = -1, int to = -1);
57 static void readProbabilities(int parIndex);
58 static void readAllParameters();
59 static void updateParameters(int idx);
60 static void storeGCPars(int idx);
61 static Integer getD() {return d;}
62 static double getMAL() {return mal.doubleValue();}
63 static void resetModelCount(){introncount = 0;};
64 static double getGeoProb(){return geoProb;}
65 static Double getAssMotifProbThreshold(double q) {return assMotif->getProbThreshold(q);}
66 static double getMeanIntrLen();
67private:
68 static void initSnippetProbs();
69 void processSequence( const char* start, const char* end);
73 void buildProbabilities ( const AnnoSequence* annoseq );
74 void buildLenDist ( const AnnoSequence* annoseq );
75 void processASS ( const char* dna, int pos, Boolean withMotif=true);
76 void processDSS ( const char* dna, int pos );
77 void makeDSSProbs ( );
78 void storeIntronLengths ( const AnnoSequence* annoseq);
79 void printLengthQuantiles();
80 void initCountVars ( );
81 void readSpliceSites ();
82
83public:
84 static Integer k;
85 static PatMMGroup emiprobs;
86 static PatMMGroup *GCemiprobs;
87 static BinnedMMGroup dssBinProbs;
88 static BinnedMMGroup assBinProbs;
89 static vector<Double> lenDist;
90private:
91 StateType itype;
92 Integer gweight;
93 char* codon;
94 static int beginOfBioIntron, endOfBioIntron;
95 static vector<Integer> emicount;
96 static vector<Integer> intlencount;
97 static Integer introns; // number of introns
98 static Integer introns_d; // number of introns of length <= d
99 static Integer d;
100 static Integer minwindowcount;
101 static double slope_of_bandwidth;
102 static Double patpseudo;
103 static Double probShortIntron; // probability that intron is at most d long
104 static Double *GCprobShortIntron; // array for each GC content class
105 static Double mal; // mean additional length (if longer than d)
106 static Double *GCmal; // array for each GC content class
107 static Integer introncount;
108 static Integer gesbasen;
109
110 // variables related to splicing
111 static Integer c_ass;
112 static Integer c_dss;
113 static vector<Integer> asscount;
114 static vector<Integer> dsscount;
115 static Integer ass_upwindow_size;
116 static vector<Double> assprobs;
117 static vector<Double> dssprobs;
118 static Motif *assMotif; // basecounts of the window before the ass
119 static Motif *GCassMotif; // array of Motifs, one for each GC content class
120 static Boolean hasSpliceSites;
121 static Double asspseudo; // pseudocount for patterns in acceptor splice sites
122 static Double dsspseudo; // pseudocount for patterns in donor splice sites
123 static Double dssneighborfactor; // taken from the prob of the neighbor patterns
124 static Integer ass_motif_memory; // order of the markov chain in the ass upstream motif
125 static Integer ass_motif_radius; // radius of the pooling window in the ass upstream motif
126 static double non_gt_dss_prob;
127 static double non_ag_ass_prob;
128 static SnippetProbs *snippetProbs, *rSnippetProbs;
129 static bool initAlgorithmsCalled, haveSnippetProbs;
130 static int lastParIndex; // GC-index of current parameter set
131 static Integer verbosity;
132 static double geoProb;
133 static int ass_outside; // Constant::ass_upwindow_size + Constant::ass_start + ASS_MIDDLE;
134};
135
141public:
142 IntronModelError(string msg) : ProjectError(msg) {}
143};
144
145#endif // _INTRONMODEL_HH
Definition gene.hh:548
Definition motif.hh:33
Contains features for bins of a probability (or a score).
Definition merkmal.hh:120
Definition intronmodel.hh:140
The intron model class.
Definition intronmodel.hh:20
void buildModel(const AnnoSequence *annoseq, int parIndex)
Definition dummy.cc:13
void printProbabilities(int parIndex, BaseCount *bc, const char *suffix=NULL)
Definition dummy.cc:14
This class implements a double object with a very large range.
Definition lldouble.hh:31
A simple matrix class. Base class for all mathematical matrix objects.
Definition matrix.hh:27
Definition motif.hh:92
Options lists are used for sampling; items also in backtracking.
Definition vitmatrix.hh:748
Definition merkmal.hh:148
Contains a vector of parameters. Is used in particular for intron emiprobs.
Definition merkmal.hh:80
Definition types.hh:449
ProjectError()
Definition types.hh:460
Definition statemodel.hh:182
This is the base interface class common to all state model classes (ExonModel, IntronModel,...
Definition statemodel.hh:65
An array of Viterbi columns.
Definition vitmatrix.hh:687