Augustus 3.4.0
Loading...
Searching...
No Matches
namgene.hh
1/*
2 * namgene.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 */
7
8#ifndef _NAM_GENE_HH
9#define _NAM_GENE_HH
10#include <climits>
11
12// project includes
13#include "extrinsicinfo.hh" // SequenceFeatureCollection
14#include "vitmatrix.hh"
15#include "matrix.hh"
16#include "pp_scoring.hh"
17#include "statemodel.hh"
18
23class NAMGeneError : public ProjectError {
24public:
25 NAMGeneError( string msg ) : ProjectError( msg ) { }
26};
27
34class NAMGene {
35public:
36 NAMGene();
37 ~NAMGene() {}
38
39 void readModelProbabilities( int number = 1);
40 StatePath* getSampledPath(const char *dna, const char* seqname = NULL);
41 StatePath* getViterbiPath(const char *dna, const char* seqname = NULL);
42 StatePath* getTrainViterbiPath(const char *dna, SequenceFeatureCollection *sfc);
47 Transcript* doViterbiPiecewise(SequenceFeatureCollection& sfc, AnnoSequence *annoseq, Strand strand, unsigned int& num_pieces);
48
49 Double getEmissionProbability();
50
57 return forward;
58 }
59
66 return viterbi;
67 }
68
69 /*
70 * get the type corresponding to a state number
71 * @param i the thumber of a state, as in the properties
72 * @return the enumeration type
73 */
74 StateType getStateType(int i);
75
76 int getStateIndex(StateType type);
77 Double getPathEmiProb(StatePath *path, const char *dna, SequenceFeatureCollection& sfc, int countStart=-1, int countEnd=-1);
78 void setNeedForwardTable(bool b){needForwardTable = b;}
79 // set the path and emiProbs for all annotations in annoseq (for option scoreTx)
80 void setPathAndProb(AnnoSequence *annoseq, FeatureCollection &extrinsicFeatures);
81 void setAllTranscripts(list<Transcript*> *tl) {sampledTxs = tl;}
82 list<Transcript*> *getAllTranscripts() {return sampledTxs;}
83 void getPrepareModels(const char *dna, int len) {prepareModels(dna, len);}
84private:
91 void viterbiAndForward(const char* dna, bool useProfile=false);
92
93 /*
94 * repeatedly do the viterbi algorithm on pieces of dna, no strands
95 */
96 list<AltGene>* getStepGenes(AnnoSequence *annoseq, SequenceFeatureCollection& sfc, Strand strand, bool onlyViterbi=true);
97
98 list<AltGene>* findGenes(const char *dna, Strand strand, bool onlyViterbi=true);
99 int getNextCutEndPoint(const char *dna, int beginPos, int maxstep, SequenceFeatureCollection& sfc);
100 void readTransAndInitProbs( );
101 void readOvlpLenDist( );
102 void checkProbsConsistency( );
103 void computeReachableStates( );
104 void createStateModels( );
105 void setStatesInitialProbs();
106 void initAlgorithms(); // called for new DNA
107 void updateToLocalGCEach(int idx, int from = -1, int to = -1);
108 void printDPMatrix(); // print Viterbi matrix
113 void prepareModels(const char*dna, int len);
114
115 long tryFindCutEndPoint(StatePath *condensedExamPath, int examIntervalStart, int examIntervalEnd, list<Feature> *groupGaps, bool onlyInternalIR);
116private:
118 ViterbiMatrixType forward;
120 ViterbiMatrixType viterbi;
122 Matrix<Double> transitions;
124 vector<Double> initProbs;
126 vector<Double> termProbs;
127 // Whether the state is reachable at all with prob > 0
128 vector<Boolean> stateReachable;
130 vector<StateModel*> states;
132 Integer statecount;
134 vector<StateType> stateMap;
136 int lastParIndex;
138 Boolean proteinOutput;
139 Boolean codSeqOutput;
140 Boolean noInFrameStop;
141 double minmeanexonintronprob;
142 double minexonintronprob; // lower bound on probabilities of all exons and introns in the coding region
143 int maxtracks; // maximum reported number of transcripts at the same sequence position
144 int sampleiterations;
145 bool alternatives_from_sampling;
146 bool alternatives_from_evidence;
147 bool mea_prediction;
148 bool mea_eval;
149 bool needForwardTable;
150 bool show_progress;
151 PP::SubstateModel* profileModel;
152 ContentStairs cs; // holds the local GC content class per position in the currently examined DNA
153 int curGCIdx; // current index of GC content class
154 list<Transcript*> *sampledTxs; // = alltranscripts stored for MultSpeciesMode
155};
156
157
158#endif // _NAM_GENE_HH
Definition gene.hh:548
holds the stepwise constant function of GC content class indices
Definition motif.hh:152
Definition extrinsicinfo.hh:314
This class implements a double object with a very large range.
Definition lldouble.hh:31
A simple matrix class. Base class for all mathematical matrix objects.
Definition matrix.hh:27
Definition namgene.hh:23
the class NAMGene is AUGUSTUS' entry point for the algorithms
Definition namgene.hh:34
const ViterbiMatrixType & getForwardVariables()
Definition namgene.hh:56
const ViterbiMatrixType & getViterbiVariables()
Definition namgene.hh:65
Transcript * doViterbiPiecewise(SequenceFeatureCollection &sfc, AnnoSequence *annoseq, Strand strand, unsigned int &num_pieces)
Definition namgene.cc:516
Definition types.hh:449
ProjectError()
Definition types.hh:460
holds all extrinsic feature information for one sequence
Definition extrinsicinfo.hh:86
A path through the Hidden-Markov-Model states.
Definition gene.hh:199
Definition gene.hh:250
An array of Viterbi columns.
Definition vitmatrix.hh:687
Definition pp_scoring.hh:136