Augustus 3.4.0
Loading...
Searching...
No Matches
utrmodel.hh
1/*
2 * utrmodel.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 */
7
8#ifndef _UTRMODEL_HH
9#define _UTRMODEL_HH
10
11#include "statemodel.hh"
12
13
19class UtrModel : public StateModel {
20public:
21 UtrModel();
22 ~UtrModel();
23
24 StateType getStateType( ) const {
25 return utype;
26 }
27
34 void buildModel( const AnnoSequence* annoseq, int parIndex );
35 void registerPars( Parameters* parameters);
36 void processStates( const Gene* gene );
37 void process5SingleExon( const State* exon, bool withLen=true );
38 void process5InitialExon( const State* exon, bool withLen=true );
39 void process5InternalExon( const State* exon);
40 void process5TerminalExon( const State* exon);
41 void process5Intron( int begin, int end);
42 void process3SingleExon( const State* exon, bool withLen=true );
43 void process3InitialExon( const State* exon, bool withLen=true );
44 void process3InternalExon( const State* exon);
45 void process3TerminalExon( const State* exon, bool withLen);
46 void process3Intron( int begin, int end);
47
48
54 void printProbabilities ( int zusNumber, BaseCount *bc, const char* suffix = NULL );
55 void initAlgorithms ( Matrix<Double>&, int);
56 void viterbiForwardAndSampling(ViterbiMatrixType&, ViterbiMatrixType&, int, int,
57 AlgorithmVariant, OptionListItem&);
58 Double emiProbUnderModel (int begin, int end) const;
59 Double endPartEmiProb (int begin, int end, int endOfBioExon) const;
60 Double notEndPartEmiProb (int begin, int end, int endOfBioExon, Feature *exonparts) const;
61 void getEndPositions ( int end, int &beginOfEndPart, int &endOfBioExon) const;
62 Double tssupSeqProb ( int left, int right, bool reverse) const;
63 Double tssProb ( int left) const;
64 static void computeTtsProbs (int from, int to);
65 static void init();
66 static void resetPars(){
67 if (utrcount == 0)
68 return;
69 initSnippetProbs();
70 initAlgorithmsCalled = false;
71 }
72 static void updateToLocalGC(int from = -1, int to = -1);
73 static void clearSegProbs();
74 static void readProbabilities(int zusNumber);
75 static void readAllParameters();
76 static void storeGCPars(int idx);
77 static void resetModelCount(){utrcount = 0;};
78 static void setTtsSpacing(int spacing){ ttsSpacing = spacing; };
79
80private:
81 Double seqProb ( int left, int right, bool reverse, int type) const; // deprecated
82 static void computeLengthDistributions( );
83 static void fillTailsOfLengthDistributions( );
84 void process5InitSequence( const char* start, const char* end);
85 void process5Sequence( const char* start, const char* end);
86 void process3Sequence( const char* start, const char* end);
87 void processTssupSequence( const char* start, const char* end);
88 void buildProbabilities ( const AnnoSequence* annoseq );
89 void buildTSSModel( const AnnoSequence* annoseq );
90 void buildTTSModel( const AnnoSequence* annoseq );
91 int findTATA(const char* seq, int maxpos, bool reverseComplement=false) const;
92 void processTSS(const char* start);
93 void initCountVars ( );
94 Double longIntronProb(int internalBegin, int internalEnd) const;
95 static void initSnippetProbs();
96 void decrementEndOfPred( int &endOfPred, list<int>::iterator &eopit, bool inCache);
97 void updatePossibleEOPs(list<int>::iterator &eopit, int endOfBioExon, bool &inCache);
98private:
99 StateType utype;
100 Integer gweight;
101 EOPList eop;
102 static Integer utrcount;
103 static vector<Integer> utr5_emicount;
104 static vector<Integer> utr5init_emicount;
105 static vector<Integer> utr3_emicount;
106 static Double utr_patpseudo;
107 static PatMMGroup utr5_emiprobs;
108 static PatMMGroup *GCutr5_emiprobs;
109 static PatMMGroup utr5init_emiprobs;
110 static PatMMGroup *GCutr5init_emiprobs;
111 static PatMMGroup utr3_emiprobs;
112 static PatMMGroup *GCutr3_emiprobs;
113 static Integer utr5init_gesbasen;
114 static Integer utr5_gesbasen;
115 static Integer utr3_gesbasen;
116 static Integer k;
117 static double utr5patternweight; // old way: this is applied AFTER reading from the parameters file
118 static double utr3patternweight; // old way: this is applied AFTER reading from the parameters file
119 static double utr5prepatternweight; // for computing a mixture directly after HMM training and BEFORE writing down to parameter file
120 static double utr3prepatternweight; // for computing a mixture directly after HMM training and BEFORE writing down to parameter file
121 static vector<Integer> tssup_emicount;
122 static Double tssup_patpseudo;
123 static vector<Double> tssup_emiprobs;
124 static vector<Double> *GCtssup_emiprobs;
125 static Integer tssup_gesbasen;
126 static Integer tssup_k;
127 static vector<Integer> lenCount5Single; // Length count of single exons
128 static vector<Integer> lenCount5Initial; // Length count of initial exons
129 static vector<Integer> lenCount5Internal; // Length count of internal exons
130 static vector<Integer> lenCount5Terminal; // Length count of terminal exons
131 static vector<Double> lenDist5Single; // Length distribution of single exons
132 static vector<Double> lenDist5Initial; // Length distribution of initial exons
133 static vector<Double> lenDist5Internal; // Length distribution of internal exons
134 static vector<Double> lenDist5Terminal; // Length distribution of terminal exons
135 static vector<Double> tailLenDist5Single; // Tail probabilities of the length distribution of single exons
136 static vector<Integer> lenCount3Single; // Length count of single exons
137 static vector<Integer> lenCount3Initial; // Length count of initial exons
138 static vector<Integer> lenCount3Internal; // Length count of internal exons
139 static vector<Integer> lenCount3Terminal; // Length count of terminal exons
140 static vector<Double> lenDist3Single; // Length distribution of single exons
141 static vector<Double> lenDist3Initial; // Length distribution of initial exons
142 static vector<Double> lenDist3Internal; // Length distribution of internal exons
143 static vector<Double> lenDist3Terminal; // Length distribution of terminal exons
144 static vector<Double> tailLenDist3Single; // Tail probabilities of the length distribution of single exons
145 static vector<Double> tssProbsPlus; // to store tss probabilities
146 static vector<Double> tssProbsMinus; // to store tss probabilities
147 static Integer num5Single, num5Initial, num5Internal, num5Terminal, num5Introns;
148 static Integer numHuge5Single, numHuge5Initial, numHuge5Internal, numHuge5Terminal;
149 static Integer num3Single, num3Initial, num3Internal, num3Terminal, num3Introns;
150 static Integer numHuge3Single, numHuge3Initial, numHuge3Internal, numHuge3Terminal;
151 static Integer exonLenD; // use detailed length distribution up to this number
152 static Integer max_exon_length;
153 static Integer max3singlelength;
154 static Integer max3termlength;
155 static double slope_of_bandwidth; // for smoothing
156 static Integer minwindowcount; // see class Smooth in commontrain.hh
157 static Boolean hasLenDist;
158 static Integer tss_start;
159 static Integer tss_end;
160 static Integer tata_start;
161 static Integer tata_end;
162 static Integer tata_pseudocount;
163 static Integer d_tss_tata_min;
164 static Integer d_tss_tata_max;
165 static Motif *tssMotif; // motif of the transcription start site of tata-less promotors
166 static Motif *GCtssMotif;
167 static Motif *ttsMotif; // motif of the transcription termination site (downstream of polyA signal)
168 static Motif *GCttsMotif;
169 static Motif *tssMotifTATA; // motif of the transcription start site of tata promotors
170 static Motif *GCtssMotifTATA;
171 static Motif *tataMotif; // motif of the tata box (if existent)
172 static Motif *GCtataMotif;
173 // UTR intron related member variables
174 static vector<Integer> intron_emicount;
175 //static vector<Double> intron_emiprobs;
176 //static Integer intron_k; // order of the markov chain
177 // static SnippetProbs *rInitSnippetProbs5, *rSnippetProbs3, *intronSnippetProbs;
178 static SegProbs *initSegProbs5, *segProbs5, *rInitSegProbs5, *rSegProbs5, *rSegProbs3, *segProbs3, *intronSegProbs;
179 static bool initAlgorithmsCalled, haveSnippetProbs;
180 static vector<Integer> aataaa_count;
181 static vector<Double> aataaa_probs;
182 static int aataaa_boxlen;
183 static string polyasig_consensus;
184 static int d_polya_cleavage_min;
185 static int d_polya_cleavage_max;
186 static double prob_polya;
187 static int tts_motif_memory;
188 static double pUtr5Intron, pUtr3Intron, prUtr5Intron, prUtr3Intron;
189 static Double *ttsProbPlus, *ttsProbMinus;
190 static vector<Integer> distCountTata;
191 static int lastParIndex;
192 static int verbosity;
193 static int ttsSpacing; // without hints allow 3' end only every ttsSpacing bases for speed
194};
195
200public:
201 UtrModelError(string msg) : ProjectError(msg) {}
202};
203
204#endif // _UTRMODEL_HH
Definition gene.hh:548
Definition motif.hh:33
data structure to store possible endOfPred positions to iterate directly only over those endOfPred po...
Definition statemodel.hh:266
Hints on the gene structure.
Definition hints.hh:60
Definition gene.hh:351
This class implements a double object with a very large range.
Definition lldouble.hh:31
A simple matrix class. Base class for all mathematical matrix objects.
Definition matrix.hh:27
Definition motif.hh:92
Options lists are used for sampling; items also in backtracking.
Definition vitmatrix.hh:748
Definition merkmal.hh:148
Contains a vector of parameters. Is used in particular for intron emiprobs.
Definition merkmal.hh:80
Definition types.hh:449
ProjectError()
Definition types.hh:460
another class for caching probabilities of sequence segments
Definition statemodel.hh:232
This is the base interface class common to all state model classes (ExonModel, IntronModel,...
Definition statemodel.hh:65
Definition gene.hh:101
Definition utrmodel.hh:199
Untranslated Region Model.
Definition utrmodel.hh:19
void printProbabilities(int zusNumber, BaseCount *bc, const char *suffix=NULL)
Definition dummy.cc:26
void buildModel(const AnnoSequence *annoseq, int parIndex)
Definition dummy.cc:25
An array of Viterbi columns.
Definition vitmatrix.hh:687