Augustus 3.4.0
Loading...
Searching...
No Matches
statemodel.hh
1/*
2 * statemodel.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 */
7
8#ifndef _STATEMODEL_HH
9#define _STATEMODEL_HH
10
11// project includes
12#include "matrix.hh"
13#include "vitmatrix.hh"
14#include "merkmal.hh"
15
16
17
18/*
19 * constants used in splitting into state/substate pairs
20 * we use the lower byte for the state, and the upper 3 for the substate
21 * SHIFT_LEFT is used to build the full id from the substate id
22 * SHIFT_RIGHT is used to retrieve back the substate id
23 */
24#define SHIFT_SIZE (sizeof (signed char)*8)
25#define SHIFT_LEFT(x) ((x) << SHIFT_SIZE)
26#define SHIFT_RIGHT(x) ((x) >> SHIFT_SIZE)
27#define MAX_STATECOUNT SHIFT_LEFT(1)
28
29
30/*
31 * getFullStateId: merge state/substate pairs into single integers (used in calls of viterbi)
32 * precondition: state >= -1, substate >= -1
33 * if substate is -1 (none), state remains unchanged
34 * getStatePair: split integers back into state/substate pairs
35 *
36 */
37inline int getFullStateId(int state, SubstateId substate = SubstateId()) {
38 return SHIFT_LEFT (substate.fullId()) + state;
39}
40inline void getStatePair(int fullState, int& state, SubstateId& substate) {
41 state = (signed char)(fullState);
42 substate.set(SHIFT_RIGHT(fullState -state));
43}
44
51struct Ancestor{
52 Ancestor(int newpos=0, Double newval=0.0) : pos(newpos), val(newval) {}
53 int pos;
54 Double val;
55};
56
66protected:
67 StateModel() {} // do not create StateModel object
68public:
69 void initPredecessors(Matrix<Double>&, int self);
70
71 // virtual methods to be implemented by the specialised classes
72 virtual void registerPars(Parameters* parameters) {}
73 virtual void buildModel(const AnnoSequence* annoseq, int parIndex) =0;
74 virtual void printProbabilities(int zusNumber=1, BaseCount *bc = NULL, const char* suffix = NULL) =0;
75 virtual StateType getStateType() const = 0;
76 virtual void viterbiForwardAndSampling(ViterbiMatrixType&, ViterbiMatrixType&, int, int,
77 AlgorithmVariant, OptionListItem&) = 0;
78 virtual Double emiProbUnderModel(int , int) const = 0;
79 virtual void initAlgorithms(Matrix<Double>&, int) = 0;
80 virtual void updateToLocalGCEach(Matrix<Double>& trans, int cur) { };
81 virtual ~StateModel() {} // nothing to do here since class is purely abstract
82
83 // class functions
84 static void init();
85 static StateModel* newStateModelPtr (const char* path);
86 static void determineShortPatterns(const vector<Integer>& patcounts, int k, int minCount);
87 static void makeProbsFromCounts(vector<Double > &patprobs , const vector<Integer > &patcounts,
88 int k, Double pseudocount, Boolean shorten = false);
89 static void computeEmiFromPat(const vector<Double>& patprobs, vector<Double>& emiprobs, Integer k);
90 static void prepareViterbi(const char* dna, int len, const vector<StateType> &stateMap);
91 static void readProbabilities(int);
92 static void resetPars();
93 // this is done when gcIdx changes once per state class
94 static void updateToLocalGC(int idx, int from = -1, int to = -1);
95 static void readAllParameters();
96 static void storeGCPars(int);
97 static void resetModelCounts();
98 static bool isPossibleDSS(int pos) {
99 return pos >= 1 && pos <= dnalen-2 &&
100 (onGenDSS(sequence + pos) ||
101 seqFeatColl->isHintedDSS(pos, plusstrand));
102 }
103 static bool isPossibleRDSS(int pos) {
104 return pos >= 1 && pos <= dnalen-2 &&
105 (onGenRDSS(sequence + pos -1) ||
106 seqFeatColl->isHintedDSS(pos, minusstrand));
107 }
108 static bool isPossibleASS(int pos) {
109 return pos >= 1 && pos <= dnalen-2 &&
110 (onASS(sequence + pos -1) ||
111 seqFeatColl->isHintedASS(pos, plusstrand));
112 }
113 static bool isPossibleRASS(int pos) {
114 return pos >= 1 && pos <= dnalen-2 &&
115 (onRASS(sequence + pos) ||
116 seqFeatColl->isHintedASS(pos, minusstrand));
117 }
118 static void setSFC(SequenceFeatureCollection *psfc) {
119 seqFeatColl = psfc;
120 }
121 static void setPP(PP::SubstateModel* mdl) {
122 profileModel = mdl;
123 }
124 static void setCountRegion(int from, int to){countStart = from; countEnd = to;}
125 static int getActiveWindowStart(int);
126 static void setGCIdx(int idx) {gcIdx = idx;}
127 static void setContentStairs(ContentStairs *stairs) {cs = stairs;}
128 static int getGCIdx(int at){if (cs) return cs->idx[at]; else return -1;}
129protected:
130 // variable unique to each model
131 vector<Ancestor> ancestor; // predecessor in the state transition graph
132
133 // class variables shared by all models
134 static const vector<StateType>* stateMap; // needed in exonmodel
135 static const char* sequence; // the sequence currently examined
136 static int dnalen;
137 static SequenceFeatureCollection* seqFeatColl;
138 static vector<Boolean>* shortpattern;
139 static PP::SubstateModel* profileModel;
140 static int countStart, countEnd; // this is the range of positions over which features are counted in CRF training
141 static int activeWinLen; // states ending before the active window will be deleted if they are not yet used
142 static int gcIdx; // GC content class index for all states
143 static ContentStairs *cs;
144}; // class StateModel
145
146
155public:
156 SnippetListItem (Double p, int length) { prob = p; len = length; next = NULL;}
158 if (next)
159 delete next;
160 }
161 Double prob;
162 int len;
163 SnippetListItem *next;
164};
165
171public:
172 Double getProb(int base, int &partlen);
173 SnippetList() {first = last = NULL;}
174 ~SnippetList(){}
175 SnippetListItem *first, *last;
176};
177
183public:
184 SnippetProbs(const char* dna, int k, bool forwardStrand=true){
185 this->dna = dna;
186 this->k = k;
187 this->forwardStrand = forwardStrand;
188 if (dna)
189 n = strlen(dna);
190 else
191 n = 0;
192 snippetlist = new SnippetList*[n];
193 for (int i=0; i<n; i++)
194 snippetlist[i] = NULL;
195 }
196 Double getSeqProb(int base, int len);
197 ~SnippetProbs () {
198 if (snippetlist) {
199 for (int i=0; i<n; i++) {
200 if (snippetlist[i]){
201 delete snippetlist[i]->first;
202 delete snippetlist[i];
203 }
204 }
205 delete [] snippetlist;
206 }
207 }
208 void setEmiProbs(vector<Double> *emiprobs) {
209 this->emiprobs = emiprobs;
210 }
211
212 void addProb(int base, int len, Double p);
213
214protected:
215 const char *dna;
216 int n;
217 vector<Double> *emiprobs;
218 int k;
219 SnippetList **snippetlist;
220 bool forwardStrand;
221private:
222 Double getElemSeqProb(int base, int len);
223};
224
232class SegProbs {
233public:
234 SegProbs(const char* dna, int k, bool forwardStrand=true) : s2i(k+1) {
235 this->dna = dna;
236 this->k = k;
237 this->forwardStrand = forwardStrand;
238 if (dna)
239 n = strlen(dna);
240 else
241 n = 0;
242 cumProds = new Double[n+1];
243 }
244 ~SegProbs () { delete [] cumProds; }
245 void setEmiProbs(vector<Double> *emiprobs, int from = -1, int to = -1);
246 Double getSeqProb(int from, int to);
247 int getN() { return n; }
248protected:
249 const char *dna;
250 int n;
251 int k;
252 Seq2Int s2i;
253 vector<Double> *emiprobs;
254 Double *cumProds;
255 bool forwardStrand;
256};
257
266class EOPList {
267public:
268 void clear() { possibleEndOfPreds.clear(); }
269 void init() {
270 eopit = possibleEndOfPreds.begin();
271 inCache = false;
272 }
273 void decrement(int &endOfPred);
274 void update(int endOfPred);
275 list<int> possibleEndOfPreds;
276 list<int>::iterator eopit;
277 bool inCache;
278};
279
280
281#endif // _STATEMODEL_HH
Definition gene.hh:548
Definition motif.hh:33
holds the stepwise constant function of GC content class indices
Definition motif.hh:152
data structure to store possible endOfPred positions to iterate directly only over those endOfPred po...
Definition statemodel.hh:266
This class implements a double object with a very large range.
Definition lldouble.hh:31
A simple matrix class. Base class for all mathematical matrix objects.
Definition matrix.hh:27
Options lists are used for sampling; items also in backtracking.
Definition vitmatrix.hh:748
Definition merkmal.hh:148
another class for caching probabilities of sequence segments
Definition statemodel.hh:232
a class for converting sequence into integer replacing Base4Int
Definition geneticcode.hh:163
holds all extrinsic feature information for one sequence
Definition extrinsicinfo.hh:86
intelligently store and retrieve the sequence emission probabilities of the sequence from a to b for ...
Definition statemodel.hh:154
Definition statemodel.hh:170
Definition statemodel.hh:182
This is the base interface class common to all state model classes (ExonModel, IntronModel,...
Definition statemodel.hh:65
static StateModel * newStateModelPtr(const char *path)
Definition statemodel.cc:72
An array of Viterbi columns.
Definition vitmatrix.hh:687
Predecessor in the state transition Graph.
Definition statemodel.hh:51
Definition pp_scoring.hh:136
Definition vitmatrix.hh:33