Augustus 3.4.0
Loading...
Searching...
No Matches
baumwelch.hh
1/*
2 * baumwelch.hh
3 *
4 * License: Artistic License, see file LICENSE.TXT or
5 * https://opensource.org/licenses/artistic-license-1.0
6 */
7
8#ifndef _BAUMWELCH_HH
9#define _BAUMWELCH_HH
10
11// project includes
12#include "types.hh"
13#include "gene.hh"
14#include "contentmodel.hh"
15
16// standard C/C++ includes
17#include <iostream>
18
19
26public:
28 type = -1; // type usually not known
29 next = NULL;
30 name = NULL;
31 }
32
33 char *seq;
34 int seqLen;
35 int frame; // reading Frame of the first nucleotide
36 int type;
37 int weight;
38 char *name;
39 TrainingData *next;
40};
41
48class BaumWelch {
49public:
50
51 BaumWelch(int k, int numFrames, int modelTypes = 2){
52 this->k = k;
53 this->numFrames = numFrames;
54 this->modelTypes = modelTypes;
55 models = NULL;
56 modelTypeProbs = NULL;
57 inputSeqs = NULL;
58
59 }
60 void initialTypeParameters(Double patpseudocount = 0.0);
61 void initialRandomParameters();
62 void initialExonTrainingData(AnnoSequence *seqList);
63 void initialIntronTrainingData(const Gene* geneList);
64 void classifyTrainingData();
65 void setK(int k){
66 if (k != this->k) {
67 delete models;
68 models = NULL;
69 delete modelTypeProbs;
70 modelTypeProbs = NULL;
71 this->k = k;
72 }
73 };
74 void setTrainingData(TrainingData *inputSeqs){
75 if (this->inputSeqs) {
76 // TODO
77 }
78 this->inputSeqs = inputSeqs;
79 }
80 Double reestimate(Double patpseudocount = 0.0);
81 void printAllContentModels(ostream &out);
82 void readAllContentModels(istream &in);
83 void printInputSeqs(ostream &out);
84 TrainingData* getInputSeqs(){return inputSeqs;};
85 ContentModel getContentModel(int type);
86 int getK() { return k;}
87 int getModelTypes() { return modelTypes;}
88 int getNumFrames() { return numFrames;}
89 Double getModelTypeProb(int i) { return (*modelTypeProbs)[i];}
90 void addTrainingData(TrainingData *td){
91 td->next = inputSeqs;
92 inputSeqs = td;
93 numData++;
94 }
95private:
96 //Double seqProbUnderModel(Matrix<Double> &patprob, char *s, int len, int frame);
97
98private:
99 int k; // order of the HMM
100 int modelTypes; // number of different models
101 int numFrames; // number of different reading frames = periodicity
102 int numData; // number of input sequences
103
104 /* For each model type we have a matrix which holds in row f (f=0,1,2, frame)
105 * and row p (p = 1..4^(k+1), number of patrern) the probability of that pattern
106 * ending in reading frame f in the model of the respective type.
107 */
108 vector<ContentModel> *models;
109 vector<Double> *modelTypeProbs; // a priori probability of that model
110 TrainingData *inputSeqs;
111};
112
113
114void printContentModels(ostream& out, vector<ContentModel> *models,
115 vector<Double> *modelTypeProbs, int k);
116
117#endif
Definition gene.hh:548
Implementation of a special case of Baum-Welch parameter estimation.
Definition baumwelch.hh:48
Model typical sequences by pattern frequencies.
Definition contentmodel.hh:28
Definition gene.hh:351
This class implements a double object with a very large range.
Definition lldouble.hh:31
a sequence together with some information
Definition baumwelch.hh:25