15#include "extrinsicinfo.hh"
52 MMGroup(
string name){parname = name; parameters = NULL;};
58 void registerPars(
Parameters* parameters,
double alphavalue = 1.0);
59 virtual string verbalDescription(
int index) = 0;
60 virtual Double* getFactor(
int index) = 0;
61 virtual void smooth(){}
62 void addCount(
int index,
int count=1);
63 int getOffset() {
return offset;}
64 void setOffset(
int offset) { this->offset = offset; }
65 void setName(
string name) {parname = name;}
66 virtual int getNumPars() {
return numPars;}
82 int getNumPars() {numPars = probs.size();
return numPars;}
85 Double* getFactor(
int index);
86 string verbalDescription(
int index);
87 Double getMinProb(
float qthresh);
101 int getNumPars() {numPars = probs[0].size() + probs[1].size() + probs[2].size();
return numPars;}
104 void getFramePat(
int index,
int &frame,
int &pattern);
105 inline int getIndex(
int frame,
int pattern){
return frame * probs[0].size() + pattern;}
106 Double* getFactor(
int index);
107 string verbalDescription(
int index);
110 vector<Double> probs[3];
124 void reset(){nbins=0; origprobs.clear(); bb.clear(); avprobs.clear();}
125 void removeOrigs(){origprobs.clear();}
126 void addProb(
Double p){origprobs.push_back(p);};
127 void trainBins(
int maxNumBins);
129 void printBoundaries();
131 Double factor(
Double p){
if (nbins==0)
return p;
return avprobs[getIndex(p)];}
132 void write(ostream &out);
133 void read(ifstream &in);
134 Double getMinProb(
float qthresh);
136 Double* getFactor(
int index);
137 string verbalDescription(
int index);
138 void smooth(){ monotonify(); }
139 int getNumPars() {numPars = avprobs.size();
return numPars;}
143 vector<Double> origprobs;
145 vector<Double> avprobs;
151 weights.reserve(5000);
152 counts.reserve(5000);
158 int addMMGroup(
MMGroup* mmgroup,
double alphavalue = 1.0);
159 void addMerkmal(
Double* parptr,
double alphavalue = 1.0);
160 void addCount(
int index,
int count=1) { counts[index] += count; }
161 void resetCounts(){
for (
int i=0; i<counts.size(); i++) counts[i]=0; };
162 void addWeights(vector<double> &h);
163 void setWeights(vector<double> &v);
164 void smoothFeatures();
166 void updateWeights();
167 double getWeight(
int i){
return weights[i]; }
168 double getAlpha(
int i){
return alpha[i]; }
169 string verbalDescription(
int index);
170 void print(
int numprint,
bool countsOnly) { print<int>(counts, numprint, countsOnly); }
172 void print(vector<T> &counts,
int numprint,
bool countsOnly);
175 vector<int> getCounts(){
return counts; }
176 int size() {
return weights.size(); }
177 vector<double> getWeights(){
return weights; }
179 vector<double> weights;
181 vector<double> alpha;
182 vector<Double*> *parptrs;
183 vector<MMGroup*> *mmgroups;
187void Parameters::print(vector<T> &counts,
int numprint,
bool countsOnly){
189 double weightsum = 0.0;
191 cout <<
"[CRF parameters]: ";
193 cout <<
" empty " << endl;
197 cout << weights.size() <<
" parameters" << endl;
199 MMGroup* mmgroup = (*mmgroups)[grpidx];
200 for (
int index=0; index < weights.size(); index++){
201 if (numprint > 0 && index == numprint/2 && weights.size() > numprint)
202 cout <<
"..." << endl;
203 if (numprint < 0 || index < numprint/2 || weights.size()-index <= numprint/2) {
204 cout << index <<
"\t";
205 while(mmgroup->getOffset() + mmgroup->getNumPars() <= index && grpidx < mmgroups->size()-1){
207 mmgroup = (*mmgroups)[grpidx];
209 if (grpidx >= mmgroups->size() || mmgroup->getOffset() > index){
212 cout << left << setw(30) << mmgroup->verbalDescription(index - mmgroup->getOffset()) <<
"\t";
213 cout <<
"alpha=" << alpha[index];
214 cout <<
"\tc=" << counts[index];
216 cout <<
"\tw=" << setprecision(6) << weights[index] <<
"\tp=" << *(*parptrs)[index];
219 countsum += counts[index];
220 weightsum += weights[index];
222 cout <<
"***\t" << setw(30) <<
"average over all features" <<
"\t";
223 cout <<
"c=" << countsum;
225 cout <<
"\tw=" << weightsum/weights.size() <<
"\tp=" << LLDouble::exp(weightsum/weights.size());
238 static void onlineLargeMarginTraining(
Parameters* startPars, vector<AnnoSequence*> trainGenes,
FeatureCollection &extrinsicFeatures);
239 static void improvedIterativeScaling(
Parameters* startPars, vector<AnnoSequence*> trainGenes,
FeatureCollection &extrinsicFeatures);
240 template <
class U,
class V>
241 static void compareWeights(
Parameters* parameters, vector<U> &startWeights, vector<V> &endWeights,
int numPrint=100);
243 static void setEvalAnnoSeqs(vector<AnnoSequence*> seqs){evalAnnoSeqs = seqs;}
245 static void setPrintPars(
int ostatecount,
StateModel **ostates){
246 statecount = ostatecount;
250 static vector<double> capOutliers(vector<double> bs);
251 static vector<AnnoSequence*> evalAnnoSeqs;
253 static int statecount;
262 bool operator()(
const T * lhs,
const T * rhs)
const {
267template <
class U,
class V>
268void CRF::compareWeights(
Parameters* parameters, vector<U> &startWeights, vector<V> &endWeights,
int numPrint){
270 if (n > startWeights.size())
271 n = startWeights.size();
273 cout <<
"sorted list of the " << n <<
" most significant changes." << endl;
274 vector<double> absDiffs;
275 absDiffs.reserve(startWeights.size());
276 for (
int i=0; i < startWeights.size(); i++)
277 absDiffs.push_back(abs(startWeights[i] - endWeights[i]));
279 std::vector<const double *> pointer;
280 pointer.reserve(absDiffs.size());
281 const double *
const start = &absDiffs[0];
282 const double *
const end = start + absDiffs.size();
284 for (
const double * iter = start; iter != end; ++iter)
285 pointer.push_back(iter);
289 for (
int i = pointer.size()-1; i >= pointer.size()-n && i >= 0; i--) {
290 const double * p = pointer[i];
292 if (endWeights[idx] != startWeights[idx]){
293 cout << parameters->verbalDescription(idx) <<
"\t" << startWeights[idx] <<
" --> " << endWeights[idx]
294 <<
" diff= " << endWeights[idx] - startWeights[idx] <<
"\t" << setprecision(4) << LLDouble::exp(endWeights[idx] - startWeights[idx]) << endl;
Contains features for bins of a probability (or a score).
Definition merkmal.hh:120
implements functions for training Conditional Random Fields
Definition merkmal.hh:235
Definition extrinsicinfo.hh:314
Contains a vector of parameters for each frame. Is used in particular for exon emiprobs.
Definition merkmal.hh:98
This class implements a double object with a very large range.
Definition lldouble.hh:31
MMGroup (MM = Merkmal = Feature)
Definition merkmal.hh:50
Definition merkmal.hh:148
Contains a vector of parameters. Is used in particular for intron emiprobs.
Definition merkmal.hh:80
This is the base interface class common to all state model classes (ExonModel, IntronModel,...
Definition statemodel.hh:65
Definition merkmal.hh:260